1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/export.h> 38 #include <linux/pci.h> 39 #include <scsi/scsi.h> 40 #include <scsi/scsi_host.h> 41 #include <scsi/scsi_eh.h> 42 #include <scsi/scsi_device.h> 43 #include <scsi/scsi_cmnd.h> 44 #include <scsi/scsi_dbg.h> 45 #include "../scsi/scsi_transport_api.h" 46 47 #include <linux/libata.h> 48 49 #include "libata.h" 50 51 enum { 52 /* speed down verdicts */ 53 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 54 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 55 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 56 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 57 58 /* error flags */ 59 ATA_EFLAG_IS_IO = (1 << 0), 60 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 61 ATA_EFLAG_OLD_ER = (1 << 31), 62 63 /* error categories */ 64 ATA_ECAT_NONE = 0, 65 ATA_ECAT_ATA_BUS = 1, 66 ATA_ECAT_TOUT_HSM = 2, 67 ATA_ECAT_UNK_DEV = 3, 68 ATA_ECAT_DUBIOUS_NONE = 4, 69 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 70 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 71 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 72 ATA_ECAT_NR = 8, 73 74 ATA_EH_CMD_DFL_TIMEOUT = 5000, 75 76 /* always put at least this amount of time between resets */ 77 ATA_EH_RESET_COOL_DOWN = 5000, 78 79 /* Waiting in ->prereset can never be reliable. It's 80 * sometimes nice to wait there but it can't be depended upon; 81 * otherwise, we wouldn't be resetting. Just give it enough 82 * time for most drives to spin up. 83 */ 84 ATA_EH_PRERESET_TIMEOUT = 10000, 85 ATA_EH_FASTDRAIN_INTERVAL = 3000, 86 87 ATA_EH_UA_TRIES = 5, 88 89 /* probe speed down parameters, see ata_eh_schedule_probe() */ 90 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 91 ATA_EH_PROBE_TRIALS = 2, 92 }; 93 94 /* The following table determines how we sequence resets. Each entry 95 * represents timeout for that try. The first try can be soft or 96 * hardreset. All others are hardreset if available. In most cases 97 * the first reset w/ 10sec timeout should succeed. Following entries 98 * are mostly for error handling, hotplug and retarded devices. 99 */ 100 static const unsigned long ata_eh_reset_timeouts[] = { 101 10000, /* most drives spin up by 10sec */ 102 10000, /* > 99% working drives spin up before 20sec */ 103 35000, /* give > 30 secs of idleness for retarded devices */ 104 5000, /* and sweet one last chance */ 105 ULONG_MAX, /* > 1 min has elapsed, give up */ 106 }; 107 108 static const unsigned long ata_eh_identify_timeouts[] = { 109 5000, /* covers > 99% of successes and not too boring on failures */ 110 10000, /* combined time till here is enough even for media access */ 111 30000, /* for true idiots */ 112 ULONG_MAX, 113 }; 114 115 static const unsigned long ata_eh_flush_timeouts[] = { 116 15000, /* be generous with flush */ 117 15000, /* ditto */ 118 30000, /* and even more generous */ 119 ULONG_MAX, 120 }; 121 122 static const unsigned long ata_eh_other_timeouts[] = { 123 5000, /* same rationale as identify timeout */ 124 10000, /* ditto */ 125 /* but no merciful 30sec for other commands, it just isn't worth it */ 126 ULONG_MAX, 127 }; 128 129 struct ata_eh_cmd_timeout_ent { 130 const u8 *commands; 131 const unsigned long *timeouts; 132 }; 133 134 /* The following table determines timeouts to use for EH internal 135 * commands. Each table entry is a command class and matches the 136 * commands the entry applies to and the timeout table to use. 137 * 138 * On the retry after a command timed out, the next timeout value from 139 * the table is used. If the table doesn't contain further entries, 140 * the last value is used. 141 * 142 * ehc->cmd_timeout_idx keeps track of which timeout to use per 143 * command class, so if SET_FEATURES times out on the first try, the 144 * next try will use the second timeout value only for that class. 145 */ 146 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 147 static const struct ata_eh_cmd_timeout_ent 148 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 149 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 150 .timeouts = ata_eh_identify_timeouts, }, 151 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 152 .timeouts = ata_eh_other_timeouts, }, 153 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 154 .timeouts = ata_eh_other_timeouts, }, 155 { .commands = CMDS(ATA_CMD_SET_FEATURES), 156 .timeouts = ata_eh_other_timeouts, }, 157 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 158 .timeouts = ata_eh_other_timeouts, }, 159 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 160 .timeouts = ata_eh_flush_timeouts }, 161 }; 162 #undef CMDS 163 164 static void __ata_port_freeze(struct ata_port *ap); 165 #ifdef CONFIG_PM 166 static void ata_eh_handle_port_suspend(struct ata_port *ap); 167 static void ata_eh_handle_port_resume(struct ata_port *ap); 168 #else /* CONFIG_PM */ 169 static void ata_eh_handle_port_suspend(struct ata_port *ap) 170 { } 171 172 static void ata_eh_handle_port_resume(struct ata_port *ap) 173 { } 174 #endif /* CONFIG_PM */ 175 176 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 177 va_list args) 178 { 179 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 180 ATA_EH_DESC_LEN - ehi->desc_len, 181 fmt, args); 182 } 183 184 /** 185 * __ata_ehi_push_desc - push error description without adding separator 186 * @ehi: target EHI 187 * @fmt: printf format string 188 * 189 * Format string according to @fmt and append it to @ehi->desc. 190 * 191 * LOCKING: 192 * spin_lock_irqsave(host lock) 193 */ 194 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 195 { 196 va_list args; 197 198 va_start(args, fmt); 199 __ata_ehi_pushv_desc(ehi, fmt, args); 200 va_end(args); 201 } 202 203 /** 204 * ata_ehi_push_desc - push error description with separator 205 * @ehi: target EHI 206 * @fmt: printf format string 207 * 208 * Format string according to @fmt and append it to @ehi->desc. 209 * If @ehi->desc is not empty, ", " is added in-between. 210 * 211 * LOCKING: 212 * spin_lock_irqsave(host lock) 213 */ 214 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 215 { 216 va_list args; 217 218 if (ehi->desc_len) 219 __ata_ehi_push_desc(ehi, ", "); 220 221 va_start(args, fmt); 222 __ata_ehi_pushv_desc(ehi, fmt, args); 223 va_end(args); 224 } 225 226 /** 227 * ata_ehi_clear_desc - clean error description 228 * @ehi: target EHI 229 * 230 * Clear @ehi->desc. 231 * 232 * LOCKING: 233 * spin_lock_irqsave(host lock) 234 */ 235 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 236 { 237 ehi->desc[0] = '\0'; 238 ehi->desc_len = 0; 239 } 240 241 /** 242 * ata_port_desc - append port description 243 * @ap: target ATA port 244 * @fmt: printf format string 245 * 246 * Format string according to @fmt and append it to port 247 * description. If port description is not empty, " " is added 248 * in-between. This function is to be used while initializing 249 * ata_host. The description is printed on host registration. 250 * 251 * LOCKING: 252 * None. 253 */ 254 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 255 { 256 va_list args; 257 258 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 259 260 if (ap->link.eh_info.desc_len) 261 __ata_ehi_push_desc(&ap->link.eh_info, " "); 262 263 va_start(args, fmt); 264 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 265 va_end(args); 266 } 267 268 #ifdef CONFIG_PCI 269 270 /** 271 * ata_port_pbar_desc - append PCI BAR description 272 * @ap: target ATA port 273 * @bar: target PCI BAR 274 * @offset: offset into PCI BAR 275 * @name: name of the area 276 * 277 * If @offset is negative, this function formats a string which 278 * contains the name, address, size and type of the BAR and 279 * appends it to the port description. If @offset is zero or 280 * positive, only name and offsetted address is appended. 281 * 282 * LOCKING: 283 * None. 284 */ 285 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 286 const char *name) 287 { 288 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 289 char *type = ""; 290 unsigned long long start, len; 291 292 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 293 type = "m"; 294 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 295 type = "i"; 296 297 start = (unsigned long long)pci_resource_start(pdev, bar); 298 len = (unsigned long long)pci_resource_len(pdev, bar); 299 300 if (offset < 0) 301 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 302 else 303 ata_port_desc(ap, "%s 0x%llx", name, 304 start + (unsigned long long)offset); 305 } 306 307 #endif /* CONFIG_PCI */ 308 309 static int ata_lookup_timeout_table(u8 cmd) 310 { 311 int i; 312 313 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 314 const u8 *cur; 315 316 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 317 if (*cur == cmd) 318 return i; 319 } 320 321 return -1; 322 } 323 324 /** 325 * ata_internal_cmd_timeout - determine timeout for an internal command 326 * @dev: target device 327 * @cmd: internal command to be issued 328 * 329 * Determine timeout for internal command @cmd for @dev. 330 * 331 * LOCKING: 332 * EH context. 333 * 334 * RETURNS: 335 * Determined timeout. 336 */ 337 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 338 { 339 struct ata_eh_context *ehc = &dev->link->eh_context; 340 int ent = ata_lookup_timeout_table(cmd); 341 int idx; 342 343 if (ent < 0) 344 return ATA_EH_CMD_DFL_TIMEOUT; 345 346 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 347 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 348 } 349 350 /** 351 * ata_internal_cmd_timed_out - notification for internal command timeout 352 * @dev: target device 353 * @cmd: internal command which timed out 354 * 355 * Notify EH that internal command @cmd for @dev timed out. This 356 * function should be called only for commands whose timeouts are 357 * determined using ata_internal_cmd_timeout(). 358 * 359 * LOCKING: 360 * EH context. 361 */ 362 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 363 { 364 struct ata_eh_context *ehc = &dev->link->eh_context; 365 int ent = ata_lookup_timeout_table(cmd); 366 int idx; 367 368 if (ent < 0) 369 return; 370 371 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 372 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 373 ehc->cmd_timeout_idx[dev->devno][ent]++; 374 } 375 376 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 377 unsigned int err_mask) 378 { 379 struct ata_ering_entry *ent; 380 381 WARN_ON(!err_mask); 382 383 ering->cursor++; 384 ering->cursor %= ATA_ERING_SIZE; 385 386 ent = &ering->ring[ering->cursor]; 387 ent->eflags = eflags; 388 ent->err_mask = err_mask; 389 ent->timestamp = get_jiffies_64(); 390 } 391 392 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 393 { 394 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 395 396 if (ent->err_mask) 397 return ent; 398 return NULL; 399 } 400 401 int ata_ering_map(struct ata_ering *ering, 402 int (*map_fn)(struct ata_ering_entry *, void *), 403 void *arg) 404 { 405 int idx, rc = 0; 406 struct ata_ering_entry *ent; 407 408 idx = ering->cursor; 409 do { 410 ent = &ering->ring[idx]; 411 if (!ent->err_mask) 412 break; 413 rc = map_fn(ent, arg); 414 if (rc) 415 break; 416 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 417 } while (idx != ering->cursor); 418 419 return rc; 420 } 421 422 int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 423 { 424 ent->eflags |= ATA_EFLAG_OLD_ER; 425 return 0; 426 } 427 428 static void ata_ering_clear(struct ata_ering *ering) 429 { 430 ata_ering_map(ering, ata_ering_clear_cb, NULL); 431 } 432 433 static unsigned int ata_eh_dev_action(struct ata_device *dev) 434 { 435 struct ata_eh_context *ehc = &dev->link->eh_context; 436 437 return ehc->i.action | ehc->i.dev_action[dev->devno]; 438 } 439 440 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 441 struct ata_eh_info *ehi, unsigned int action) 442 { 443 struct ata_device *tdev; 444 445 if (!dev) { 446 ehi->action &= ~action; 447 ata_for_each_dev(tdev, link, ALL) 448 ehi->dev_action[tdev->devno] &= ~action; 449 } else { 450 /* doesn't make sense for port-wide EH actions */ 451 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 452 453 /* break ehi->action into ehi->dev_action */ 454 if (ehi->action & action) { 455 ata_for_each_dev(tdev, link, ALL) 456 ehi->dev_action[tdev->devno] |= 457 ehi->action & action; 458 ehi->action &= ~action; 459 } 460 461 /* turn off the specified per-dev action */ 462 ehi->dev_action[dev->devno] &= ~action; 463 } 464 } 465 466 /** 467 * ata_eh_acquire - acquire EH ownership 468 * @ap: ATA port to acquire EH ownership for 469 * 470 * Acquire EH ownership for @ap. This is the basic exclusion 471 * mechanism for ports sharing a host. Only one port hanging off 472 * the same host can claim the ownership of EH. 473 * 474 * LOCKING: 475 * EH context. 476 */ 477 void ata_eh_acquire(struct ata_port *ap) 478 { 479 mutex_lock(&ap->host->eh_mutex); 480 WARN_ON_ONCE(ap->host->eh_owner); 481 ap->host->eh_owner = current; 482 } 483 484 /** 485 * ata_eh_release - release EH ownership 486 * @ap: ATA port to release EH ownership for 487 * 488 * Release EH ownership for @ap if the caller. The caller must 489 * have acquired EH ownership using ata_eh_acquire() previously. 490 * 491 * LOCKING: 492 * EH context. 493 */ 494 void ata_eh_release(struct ata_port *ap) 495 { 496 WARN_ON_ONCE(ap->host->eh_owner != current); 497 ap->host->eh_owner = NULL; 498 mutex_unlock(&ap->host->eh_mutex); 499 } 500 501 /** 502 * ata_scsi_timed_out - SCSI layer time out callback 503 * @cmd: timed out SCSI command 504 * 505 * Handles SCSI layer timeout. We race with normal completion of 506 * the qc for @cmd. If the qc is already gone, we lose and let 507 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 508 * timed out and EH should be invoked. Prevent ata_qc_complete() 509 * from finishing it by setting EH_SCHEDULED and return 510 * EH_NOT_HANDLED. 511 * 512 * TODO: kill this function once old EH is gone. 513 * 514 * LOCKING: 515 * Called from timer context 516 * 517 * RETURNS: 518 * EH_HANDLED or EH_NOT_HANDLED 519 */ 520 enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 521 { 522 struct Scsi_Host *host = cmd->device->host; 523 struct ata_port *ap = ata_shost_to_port(host); 524 unsigned long flags; 525 struct ata_queued_cmd *qc; 526 enum blk_eh_timer_return ret; 527 528 DPRINTK("ENTER\n"); 529 530 if (ap->ops->error_handler) { 531 ret = BLK_EH_NOT_HANDLED; 532 goto out; 533 } 534 535 ret = BLK_EH_HANDLED; 536 spin_lock_irqsave(ap->lock, flags); 537 qc = ata_qc_from_tag(ap, ap->link.active_tag); 538 if (qc) { 539 WARN_ON(qc->scsicmd != cmd); 540 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 541 qc->err_mask |= AC_ERR_TIMEOUT; 542 ret = BLK_EH_NOT_HANDLED; 543 } 544 spin_unlock_irqrestore(ap->lock, flags); 545 546 out: 547 DPRINTK("EXIT, ret=%d\n", ret); 548 return ret; 549 } 550 551 static void ata_eh_unload(struct ata_port *ap) 552 { 553 struct ata_link *link; 554 struct ata_device *dev; 555 unsigned long flags; 556 557 /* Restore SControl IPM and SPD for the next driver and 558 * disable attached devices. 559 */ 560 ata_for_each_link(link, ap, PMP_FIRST) { 561 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 562 ata_for_each_dev(dev, link, ALL) 563 ata_dev_disable(dev); 564 } 565 566 /* freeze and set UNLOADED */ 567 spin_lock_irqsave(ap->lock, flags); 568 569 ata_port_freeze(ap); /* won't be thawed */ 570 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 571 ap->pflags |= ATA_PFLAG_UNLOADED; 572 573 spin_unlock_irqrestore(ap->lock, flags); 574 } 575 576 /** 577 * ata_scsi_error - SCSI layer error handler callback 578 * @host: SCSI host on which error occurred 579 * 580 * Handles SCSI-layer-thrown error events. 581 * 582 * LOCKING: 583 * Inherited from SCSI layer (none, can sleep) 584 * 585 * RETURNS: 586 * Zero. 587 */ 588 void ata_scsi_error(struct Scsi_Host *host) 589 { 590 struct ata_port *ap = ata_shost_to_port(host); 591 unsigned long flags; 592 LIST_HEAD(eh_work_q); 593 594 DPRINTK("ENTER\n"); 595 596 spin_lock_irqsave(host->host_lock, flags); 597 list_splice_init(&host->eh_cmd_q, &eh_work_q); 598 spin_unlock_irqrestore(host->host_lock, flags); 599 600 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 601 602 /* If we timed raced normal completion and there is nothing to 603 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 604 ata_scsi_port_error_handler(host, ap); 605 606 /* finish or retry handled scmd's and clean up */ 607 WARN_ON(host->host_failed || !list_empty(&eh_work_q)); 608 609 DPRINTK("EXIT\n"); 610 } 611 612 /** 613 * ata_scsi_cmd_error_handler - error callback for a list of commands 614 * @host: scsi host containing the port 615 * @ap: ATA port within the host 616 * @eh_work_q: list of commands to process 617 * 618 * process the given list of commands and return those finished to the 619 * ap->eh_done_q. This function is the first part of the libata error 620 * handler which processes a given list of failed commands. 621 */ 622 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 623 struct list_head *eh_work_q) 624 { 625 int i; 626 unsigned long flags; 627 628 /* make sure sff pio task is not running */ 629 ata_sff_flush_pio_task(ap); 630 631 /* synchronize with host lock and sort out timeouts */ 632 633 /* For new EH, all qcs are finished in one of three ways - 634 * normal completion, error completion, and SCSI timeout. 635 * Both completions can race against SCSI timeout. When normal 636 * completion wins, the qc never reaches EH. When error 637 * completion wins, the qc has ATA_QCFLAG_FAILED set. 638 * 639 * When SCSI timeout wins, things are a bit more complex. 640 * Normal or error completion can occur after the timeout but 641 * before this point. In such cases, both types of 642 * completions are honored. A scmd is determined to have 643 * timed out iff its associated qc is active and not failed. 644 */ 645 if (ap->ops->error_handler) { 646 struct scsi_cmnd *scmd, *tmp; 647 int nr_timedout = 0; 648 649 spin_lock_irqsave(ap->lock, flags); 650 651 /* This must occur under the ap->lock as we don't want 652 a polled recovery to race the real interrupt handler 653 654 The lost_interrupt handler checks for any completed but 655 non-notified command and completes much like an IRQ handler. 656 657 We then fall into the error recovery code which will treat 658 this as if normal completion won the race */ 659 660 if (ap->ops->lost_interrupt) 661 ap->ops->lost_interrupt(ap); 662 663 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 664 struct ata_queued_cmd *qc; 665 666 for (i = 0; i < ATA_MAX_QUEUE; i++) { 667 qc = __ata_qc_from_tag(ap, i); 668 if (qc->flags & ATA_QCFLAG_ACTIVE && 669 qc->scsicmd == scmd) 670 break; 671 } 672 673 if (i < ATA_MAX_QUEUE) { 674 /* the scmd has an associated qc */ 675 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 676 /* which hasn't failed yet, timeout */ 677 qc->err_mask |= AC_ERR_TIMEOUT; 678 qc->flags |= ATA_QCFLAG_FAILED; 679 nr_timedout++; 680 } 681 } else { 682 /* Normal completion occurred after 683 * SCSI timeout but before this point. 684 * Successfully complete it. 685 */ 686 scmd->retries = scmd->allowed; 687 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 688 } 689 } 690 691 /* If we have timed out qcs. They belong to EH from 692 * this point but the state of the controller is 693 * unknown. Freeze the port to make sure the IRQ 694 * handler doesn't diddle with those qcs. This must 695 * be done atomically w.r.t. setting QCFLAG_FAILED. 696 */ 697 if (nr_timedout) 698 __ata_port_freeze(ap); 699 700 spin_unlock_irqrestore(ap->lock, flags); 701 702 /* initialize eh_tries */ 703 ap->eh_tries = ATA_EH_MAX_TRIES; 704 } else 705 spin_unlock_wait(ap->lock); 706 707 } 708 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 709 710 /** 711 * ata_scsi_port_error_handler - recover the port after the commands 712 * @host: SCSI host containing the port 713 * @ap: the ATA port 714 * 715 * Handle the recovery of the port @ap after all the commands 716 * have been recovered. 717 */ 718 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 719 { 720 unsigned long flags; 721 722 /* invoke error handler */ 723 if (ap->ops->error_handler) { 724 struct ata_link *link; 725 726 /* acquire EH ownership */ 727 ata_eh_acquire(ap); 728 repeat: 729 /* kill fast drain timer */ 730 del_timer_sync(&ap->fastdrain_timer); 731 732 /* process port resume request */ 733 ata_eh_handle_port_resume(ap); 734 735 /* fetch & clear EH info */ 736 spin_lock_irqsave(ap->lock, flags); 737 738 ata_for_each_link(link, ap, HOST_FIRST) { 739 struct ata_eh_context *ehc = &link->eh_context; 740 struct ata_device *dev; 741 742 memset(&link->eh_context, 0, sizeof(link->eh_context)); 743 link->eh_context.i = link->eh_info; 744 memset(&link->eh_info, 0, sizeof(link->eh_info)); 745 746 ata_for_each_dev(dev, link, ENABLED) { 747 int devno = dev->devno; 748 749 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 750 if (ata_ncq_enabled(dev)) 751 ehc->saved_ncq_enabled |= 1 << devno; 752 } 753 } 754 755 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 756 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 757 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 758 759 spin_unlock_irqrestore(ap->lock, flags); 760 761 /* invoke EH, skip if unloading or suspended */ 762 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 763 ap->ops->error_handler(ap); 764 else { 765 /* if unloading, commence suicide */ 766 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 767 !(ap->pflags & ATA_PFLAG_UNLOADED)) 768 ata_eh_unload(ap); 769 ata_eh_finish(ap); 770 } 771 772 /* process port suspend request */ 773 ata_eh_handle_port_suspend(ap); 774 775 /* Exception might have happened after ->error_handler 776 * recovered the port but before this point. Repeat 777 * EH in such case. 778 */ 779 spin_lock_irqsave(ap->lock, flags); 780 781 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 782 if (--ap->eh_tries) { 783 spin_unlock_irqrestore(ap->lock, flags); 784 goto repeat; 785 } 786 ata_port_err(ap, 787 "EH pending after %d tries, giving up\n", 788 ATA_EH_MAX_TRIES); 789 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 790 } 791 792 /* this run is complete, make sure EH info is clear */ 793 ata_for_each_link(link, ap, HOST_FIRST) 794 memset(&link->eh_info, 0, sizeof(link->eh_info)); 795 796 /* Clear host_eh_scheduled while holding ap->lock such 797 * that if exception occurs after this point but 798 * before EH completion, SCSI midlayer will 799 * re-initiate EH. 800 */ 801 host->host_eh_scheduled = 0; 802 803 spin_unlock_irqrestore(ap->lock, flags); 804 ata_eh_release(ap); 805 } else { 806 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 807 ap->ops->eng_timeout(ap); 808 } 809 810 scsi_eh_flush_done_q(&ap->eh_done_q); 811 812 /* clean up */ 813 spin_lock_irqsave(ap->lock, flags); 814 815 if (ap->pflags & ATA_PFLAG_LOADING) 816 ap->pflags &= ~ATA_PFLAG_LOADING; 817 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 818 schedule_delayed_work(&ap->hotplug_task, 0); 819 820 if (ap->pflags & ATA_PFLAG_RECOVERED) 821 ata_port_info(ap, "EH complete\n"); 822 823 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 824 825 /* tell wait_eh that we're done */ 826 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 827 wake_up_all(&ap->eh_wait_q); 828 829 spin_unlock_irqrestore(ap->lock, flags); 830 } 831 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 832 833 /** 834 * ata_port_wait_eh - Wait for the currently pending EH to complete 835 * @ap: Port to wait EH for 836 * 837 * Wait until the currently pending EH is complete. 838 * 839 * LOCKING: 840 * Kernel thread context (may sleep). 841 */ 842 void ata_port_wait_eh(struct ata_port *ap) 843 { 844 unsigned long flags; 845 DEFINE_WAIT(wait); 846 847 retry: 848 spin_lock_irqsave(ap->lock, flags); 849 850 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 851 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 852 spin_unlock_irqrestore(ap->lock, flags); 853 schedule(); 854 spin_lock_irqsave(ap->lock, flags); 855 } 856 finish_wait(&ap->eh_wait_q, &wait); 857 858 spin_unlock_irqrestore(ap->lock, flags); 859 860 /* make sure SCSI EH is complete */ 861 if (scsi_host_in_recovery(ap->scsi_host)) { 862 ata_msleep(ap, 10); 863 goto retry; 864 } 865 } 866 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 867 868 static int ata_eh_nr_in_flight(struct ata_port *ap) 869 { 870 unsigned int tag; 871 int nr = 0; 872 873 /* count only non-internal commands */ 874 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 875 if (ata_qc_from_tag(ap, tag)) 876 nr++; 877 878 return nr; 879 } 880 881 void ata_eh_fastdrain_timerfn(unsigned long arg) 882 { 883 struct ata_port *ap = (void *)arg; 884 unsigned long flags; 885 int cnt; 886 887 spin_lock_irqsave(ap->lock, flags); 888 889 cnt = ata_eh_nr_in_flight(ap); 890 891 /* are we done? */ 892 if (!cnt) 893 goto out_unlock; 894 895 if (cnt == ap->fastdrain_cnt) { 896 unsigned int tag; 897 898 /* No progress during the last interval, tag all 899 * in-flight qcs as timed out and freeze the port. 900 */ 901 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 902 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 903 if (qc) 904 qc->err_mask |= AC_ERR_TIMEOUT; 905 } 906 907 ata_port_freeze(ap); 908 } else { 909 /* some qcs have finished, give it another chance */ 910 ap->fastdrain_cnt = cnt; 911 ap->fastdrain_timer.expires = 912 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 913 add_timer(&ap->fastdrain_timer); 914 } 915 916 out_unlock: 917 spin_unlock_irqrestore(ap->lock, flags); 918 } 919 920 /** 921 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 922 * @ap: target ATA port 923 * @fastdrain: activate fast drain 924 * 925 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 926 * is non-zero and EH wasn't pending before. Fast drain ensures 927 * that EH kicks in in timely manner. 928 * 929 * LOCKING: 930 * spin_lock_irqsave(host lock) 931 */ 932 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 933 { 934 int cnt; 935 936 /* already scheduled? */ 937 if (ap->pflags & ATA_PFLAG_EH_PENDING) 938 return; 939 940 ap->pflags |= ATA_PFLAG_EH_PENDING; 941 942 if (!fastdrain) 943 return; 944 945 /* do we have in-flight qcs? */ 946 cnt = ata_eh_nr_in_flight(ap); 947 if (!cnt) 948 return; 949 950 /* activate fast drain */ 951 ap->fastdrain_cnt = cnt; 952 ap->fastdrain_timer.expires = 953 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 954 add_timer(&ap->fastdrain_timer); 955 } 956 957 /** 958 * ata_qc_schedule_eh - schedule qc for error handling 959 * @qc: command to schedule error handling for 960 * 961 * Schedule error handling for @qc. EH will kick in as soon as 962 * other commands are drained. 963 * 964 * LOCKING: 965 * spin_lock_irqsave(host lock) 966 */ 967 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 968 { 969 struct ata_port *ap = qc->ap; 970 struct request_queue *q = qc->scsicmd->device->request_queue; 971 unsigned long flags; 972 973 WARN_ON(!ap->ops->error_handler); 974 975 qc->flags |= ATA_QCFLAG_FAILED; 976 ata_eh_set_pending(ap, 1); 977 978 /* The following will fail if timeout has already expired. 979 * ata_scsi_error() takes care of such scmds on EH entry. 980 * Note that ATA_QCFLAG_FAILED is unconditionally set after 981 * this function completes. 982 */ 983 spin_lock_irqsave(q->queue_lock, flags); 984 blk_abort_request(qc->scsicmd->request); 985 spin_unlock_irqrestore(q->queue_lock, flags); 986 } 987 988 /** 989 * ata_port_schedule_eh - schedule error handling without a qc 990 * @ap: ATA port to schedule EH for 991 * 992 * Schedule error handling for @ap. EH will kick in as soon as 993 * all commands are drained. 994 * 995 * LOCKING: 996 * spin_lock_irqsave(host lock) 997 */ 998 void ata_port_schedule_eh(struct ata_port *ap) 999 { 1000 WARN_ON(!ap->ops->error_handler); 1001 1002 if (ap->pflags & ATA_PFLAG_INITIALIZING) 1003 return; 1004 1005 ata_eh_set_pending(ap, 1); 1006 scsi_schedule_eh(ap->scsi_host); 1007 1008 DPRINTK("port EH scheduled\n"); 1009 } 1010 1011 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 1012 { 1013 int tag, nr_aborted = 0; 1014 1015 WARN_ON(!ap->ops->error_handler); 1016 1017 /* we're gonna abort all commands, no need for fast drain */ 1018 ata_eh_set_pending(ap, 0); 1019 1020 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1021 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 1022 1023 if (qc && (!link || qc->dev->link == link)) { 1024 qc->flags |= ATA_QCFLAG_FAILED; 1025 ata_qc_complete(qc); 1026 nr_aborted++; 1027 } 1028 } 1029 1030 if (!nr_aborted) 1031 ata_port_schedule_eh(ap); 1032 1033 return nr_aborted; 1034 } 1035 1036 /** 1037 * ata_link_abort - abort all qc's on the link 1038 * @link: ATA link to abort qc's for 1039 * 1040 * Abort all active qc's active on @link and schedule EH. 1041 * 1042 * LOCKING: 1043 * spin_lock_irqsave(host lock) 1044 * 1045 * RETURNS: 1046 * Number of aborted qc's. 1047 */ 1048 int ata_link_abort(struct ata_link *link) 1049 { 1050 return ata_do_link_abort(link->ap, link); 1051 } 1052 1053 /** 1054 * ata_port_abort - abort all qc's on the port 1055 * @ap: ATA port to abort qc's for 1056 * 1057 * Abort all active qc's of @ap and schedule EH. 1058 * 1059 * LOCKING: 1060 * spin_lock_irqsave(host_set lock) 1061 * 1062 * RETURNS: 1063 * Number of aborted qc's. 1064 */ 1065 int ata_port_abort(struct ata_port *ap) 1066 { 1067 return ata_do_link_abort(ap, NULL); 1068 } 1069 1070 /** 1071 * __ata_port_freeze - freeze port 1072 * @ap: ATA port to freeze 1073 * 1074 * This function is called when HSM violation or some other 1075 * condition disrupts normal operation of the port. Frozen port 1076 * is not allowed to perform any operation until the port is 1077 * thawed, which usually follows a successful reset. 1078 * 1079 * ap->ops->freeze() callback can be used for freezing the port 1080 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1081 * port cannot be frozen hardware-wise, the interrupt handler 1082 * must ack and clear interrupts unconditionally while the port 1083 * is frozen. 1084 * 1085 * LOCKING: 1086 * spin_lock_irqsave(host lock) 1087 */ 1088 static void __ata_port_freeze(struct ata_port *ap) 1089 { 1090 WARN_ON(!ap->ops->error_handler); 1091 1092 if (ap->ops->freeze) 1093 ap->ops->freeze(ap); 1094 1095 ap->pflags |= ATA_PFLAG_FROZEN; 1096 1097 DPRINTK("ata%u port frozen\n", ap->print_id); 1098 } 1099 1100 /** 1101 * ata_port_freeze - abort & freeze port 1102 * @ap: ATA port to freeze 1103 * 1104 * Abort and freeze @ap. The freeze operation must be called 1105 * first, because some hardware requires special operations 1106 * before the taskfile registers are accessible. 1107 * 1108 * LOCKING: 1109 * spin_lock_irqsave(host lock) 1110 * 1111 * RETURNS: 1112 * Number of aborted commands. 1113 */ 1114 int ata_port_freeze(struct ata_port *ap) 1115 { 1116 int nr_aborted; 1117 1118 WARN_ON(!ap->ops->error_handler); 1119 1120 __ata_port_freeze(ap); 1121 nr_aborted = ata_port_abort(ap); 1122 1123 return nr_aborted; 1124 } 1125 1126 /** 1127 * sata_async_notification - SATA async notification handler 1128 * @ap: ATA port where async notification is received 1129 * 1130 * Handler to be called when async notification via SDB FIS is 1131 * received. This function schedules EH if necessary. 1132 * 1133 * LOCKING: 1134 * spin_lock_irqsave(host lock) 1135 * 1136 * RETURNS: 1137 * 1 if EH is scheduled, 0 otherwise. 1138 */ 1139 int sata_async_notification(struct ata_port *ap) 1140 { 1141 u32 sntf; 1142 int rc; 1143 1144 if (!(ap->flags & ATA_FLAG_AN)) 1145 return 0; 1146 1147 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1148 if (rc == 0) 1149 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1150 1151 if (!sata_pmp_attached(ap) || rc) { 1152 /* PMP is not attached or SNTF is not available */ 1153 if (!sata_pmp_attached(ap)) { 1154 /* PMP is not attached. Check whether ATAPI 1155 * AN is configured. If so, notify media 1156 * change. 1157 */ 1158 struct ata_device *dev = ap->link.device; 1159 1160 if ((dev->class == ATA_DEV_ATAPI) && 1161 (dev->flags & ATA_DFLAG_AN)) 1162 ata_scsi_media_change_notify(dev); 1163 return 0; 1164 } else { 1165 /* PMP is attached but SNTF is not available. 1166 * ATAPI async media change notification is 1167 * not used. The PMP must be reporting PHY 1168 * status change, schedule EH. 1169 */ 1170 ata_port_schedule_eh(ap); 1171 return 1; 1172 } 1173 } else { 1174 /* PMP is attached and SNTF is available */ 1175 struct ata_link *link; 1176 1177 /* check and notify ATAPI AN */ 1178 ata_for_each_link(link, ap, EDGE) { 1179 if (!(sntf & (1 << link->pmp))) 1180 continue; 1181 1182 if ((link->device->class == ATA_DEV_ATAPI) && 1183 (link->device->flags & ATA_DFLAG_AN)) 1184 ata_scsi_media_change_notify(link->device); 1185 } 1186 1187 /* If PMP is reporting that PHY status of some 1188 * downstream ports has changed, schedule EH. 1189 */ 1190 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1191 ata_port_schedule_eh(ap); 1192 return 1; 1193 } 1194 1195 return 0; 1196 } 1197 } 1198 1199 /** 1200 * ata_eh_freeze_port - EH helper to freeze port 1201 * @ap: ATA port to freeze 1202 * 1203 * Freeze @ap. 1204 * 1205 * LOCKING: 1206 * None. 1207 */ 1208 void ata_eh_freeze_port(struct ata_port *ap) 1209 { 1210 unsigned long flags; 1211 1212 if (!ap->ops->error_handler) 1213 return; 1214 1215 spin_lock_irqsave(ap->lock, flags); 1216 __ata_port_freeze(ap); 1217 spin_unlock_irqrestore(ap->lock, flags); 1218 } 1219 1220 /** 1221 * ata_port_thaw_port - EH helper to thaw port 1222 * @ap: ATA port to thaw 1223 * 1224 * Thaw frozen port @ap. 1225 * 1226 * LOCKING: 1227 * None. 1228 */ 1229 void ata_eh_thaw_port(struct ata_port *ap) 1230 { 1231 unsigned long flags; 1232 1233 if (!ap->ops->error_handler) 1234 return; 1235 1236 spin_lock_irqsave(ap->lock, flags); 1237 1238 ap->pflags &= ~ATA_PFLAG_FROZEN; 1239 1240 if (ap->ops->thaw) 1241 ap->ops->thaw(ap); 1242 1243 spin_unlock_irqrestore(ap->lock, flags); 1244 1245 DPRINTK("ata%u port thawed\n", ap->print_id); 1246 } 1247 1248 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1249 { 1250 /* nada */ 1251 } 1252 1253 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1254 { 1255 struct ata_port *ap = qc->ap; 1256 struct scsi_cmnd *scmd = qc->scsicmd; 1257 unsigned long flags; 1258 1259 spin_lock_irqsave(ap->lock, flags); 1260 qc->scsidone = ata_eh_scsidone; 1261 __ata_qc_complete(qc); 1262 WARN_ON(ata_tag_valid(qc->tag)); 1263 spin_unlock_irqrestore(ap->lock, flags); 1264 1265 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1266 } 1267 1268 /** 1269 * ata_eh_qc_complete - Complete an active ATA command from EH 1270 * @qc: Command to complete 1271 * 1272 * Indicate to the mid and upper layers that an ATA command has 1273 * completed. To be used from EH. 1274 */ 1275 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1276 { 1277 struct scsi_cmnd *scmd = qc->scsicmd; 1278 scmd->retries = scmd->allowed; 1279 __ata_eh_qc_complete(qc); 1280 } 1281 1282 /** 1283 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1284 * @qc: Command to retry 1285 * 1286 * Indicate to the mid and upper layers that an ATA command 1287 * should be retried. To be used from EH. 1288 * 1289 * SCSI midlayer limits the number of retries to scmd->allowed. 1290 * scmd->retries is decremented for commands which get retried 1291 * due to unrelated failures (qc->err_mask is zero). 1292 */ 1293 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1294 { 1295 struct scsi_cmnd *scmd = qc->scsicmd; 1296 if (!qc->err_mask && scmd->retries) 1297 scmd->retries--; 1298 __ata_eh_qc_complete(qc); 1299 } 1300 1301 /** 1302 * ata_dev_disable - disable ATA device 1303 * @dev: ATA device to disable 1304 * 1305 * Disable @dev. 1306 * 1307 * Locking: 1308 * EH context. 1309 */ 1310 void ata_dev_disable(struct ata_device *dev) 1311 { 1312 if (!ata_dev_enabled(dev)) 1313 return; 1314 1315 if (ata_msg_drv(dev->link->ap)) 1316 ata_dev_warn(dev, "disabled\n"); 1317 ata_acpi_on_disable(dev); 1318 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1319 dev->class++; 1320 1321 /* From now till the next successful probe, ering is used to 1322 * track probe failures. Clear accumulated device error info. 1323 */ 1324 ata_ering_clear(&dev->ering); 1325 } 1326 1327 /** 1328 * ata_eh_detach_dev - detach ATA device 1329 * @dev: ATA device to detach 1330 * 1331 * Detach @dev. 1332 * 1333 * LOCKING: 1334 * None. 1335 */ 1336 void ata_eh_detach_dev(struct ata_device *dev) 1337 { 1338 struct ata_link *link = dev->link; 1339 struct ata_port *ap = link->ap; 1340 struct ata_eh_context *ehc = &link->eh_context; 1341 unsigned long flags; 1342 1343 ata_dev_disable(dev); 1344 1345 spin_lock_irqsave(ap->lock, flags); 1346 1347 dev->flags &= ~ATA_DFLAG_DETACH; 1348 1349 if (ata_scsi_offline_dev(dev)) { 1350 dev->flags |= ATA_DFLAG_DETACHED; 1351 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1352 } 1353 1354 /* clear per-dev EH info */ 1355 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1356 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1357 ehc->saved_xfer_mode[dev->devno] = 0; 1358 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1359 1360 spin_unlock_irqrestore(ap->lock, flags); 1361 } 1362 1363 /** 1364 * ata_eh_about_to_do - about to perform eh_action 1365 * @link: target ATA link 1366 * @dev: target ATA dev for per-dev action (can be NULL) 1367 * @action: action about to be performed 1368 * 1369 * Called just before performing EH actions to clear related bits 1370 * in @link->eh_info such that eh actions are not unnecessarily 1371 * repeated. 1372 * 1373 * LOCKING: 1374 * None. 1375 */ 1376 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1377 unsigned int action) 1378 { 1379 struct ata_port *ap = link->ap; 1380 struct ata_eh_info *ehi = &link->eh_info; 1381 struct ata_eh_context *ehc = &link->eh_context; 1382 unsigned long flags; 1383 1384 spin_lock_irqsave(ap->lock, flags); 1385 1386 ata_eh_clear_action(link, dev, ehi, action); 1387 1388 /* About to take EH action, set RECOVERED. Ignore actions on 1389 * slave links as master will do them again. 1390 */ 1391 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1392 ap->pflags |= ATA_PFLAG_RECOVERED; 1393 1394 spin_unlock_irqrestore(ap->lock, flags); 1395 } 1396 1397 /** 1398 * ata_eh_done - EH action complete 1399 * @ap: target ATA port 1400 * @dev: target ATA dev for per-dev action (can be NULL) 1401 * @action: action just completed 1402 * 1403 * Called right after performing EH actions to clear related bits 1404 * in @link->eh_context. 1405 * 1406 * LOCKING: 1407 * None. 1408 */ 1409 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1410 unsigned int action) 1411 { 1412 struct ata_eh_context *ehc = &link->eh_context; 1413 1414 ata_eh_clear_action(link, dev, &ehc->i, action); 1415 } 1416 1417 /** 1418 * ata_err_string - convert err_mask to descriptive string 1419 * @err_mask: error mask to convert to string 1420 * 1421 * Convert @err_mask to descriptive string. Errors are 1422 * prioritized according to severity and only the most severe 1423 * error is reported. 1424 * 1425 * LOCKING: 1426 * None. 1427 * 1428 * RETURNS: 1429 * Descriptive string for @err_mask 1430 */ 1431 static const char *ata_err_string(unsigned int err_mask) 1432 { 1433 if (err_mask & AC_ERR_HOST_BUS) 1434 return "host bus error"; 1435 if (err_mask & AC_ERR_ATA_BUS) 1436 return "ATA bus error"; 1437 if (err_mask & AC_ERR_TIMEOUT) 1438 return "timeout"; 1439 if (err_mask & AC_ERR_HSM) 1440 return "HSM violation"; 1441 if (err_mask & AC_ERR_SYSTEM) 1442 return "internal error"; 1443 if (err_mask & AC_ERR_MEDIA) 1444 return "media error"; 1445 if (err_mask & AC_ERR_INVALID) 1446 return "invalid argument"; 1447 if (err_mask & AC_ERR_DEV) 1448 return "device error"; 1449 return "unknown error"; 1450 } 1451 1452 /** 1453 * ata_read_log_page - read a specific log page 1454 * @dev: target device 1455 * @page: page to read 1456 * @buf: buffer to store read page 1457 * @sectors: number of sectors to read 1458 * 1459 * Read log page using READ_LOG_EXT command. 1460 * 1461 * LOCKING: 1462 * Kernel thread context (may sleep). 1463 * 1464 * RETURNS: 1465 * 0 on success, AC_ERR_* mask otherwise. 1466 */ 1467 static unsigned int ata_read_log_page(struct ata_device *dev, 1468 u8 page, void *buf, unsigned int sectors) 1469 { 1470 struct ata_taskfile tf; 1471 unsigned int err_mask; 1472 1473 DPRINTK("read log page - page %d\n", page); 1474 1475 ata_tf_init(dev, &tf); 1476 tf.command = ATA_CMD_READ_LOG_EXT; 1477 tf.lbal = page; 1478 tf.nsect = sectors; 1479 tf.hob_nsect = sectors >> 8; 1480 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1481 tf.protocol = ATA_PROT_PIO; 1482 1483 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1484 buf, sectors * ATA_SECT_SIZE, 0); 1485 1486 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1487 return err_mask; 1488 } 1489 1490 /** 1491 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1492 * @dev: Device to read log page 10h from 1493 * @tag: Resulting tag of the failed command 1494 * @tf: Resulting taskfile registers of the failed command 1495 * 1496 * Read log page 10h to obtain NCQ error details and clear error 1497 * condition. 1498 * 1499 * LOCKING: 1500 * Kernel thread context (may sleep). 1501 * 1502 * RETURNS: 1503 * 0 on success, -errno otherwise. 1504 */ 1505 static int ata_eh_read_log_10h(struct ata_device *dev, 1506 int *tag, struct ata_taskfile *tf) 1507 { 1508 u8 *buf = dev->link->ap->sector_buf; 1509 unsigned int err_mask; 1510 u8 csum; 1511 int i; 1512 1513 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1514 if (err_mask) 1515 return -EIO; 1516 1517 csum = 0; 1518 for (i = 0; i < ATA_SECT_SIZE; i++) 1519 csum += buf[i]; 1520 if (csum) 1521 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n", 1522 csum); 1523 1524 if (buf[0] & 0x80) 1525 return -ENOENT; 1526 1527 *tag = buf[0] & 0x1f; 1528 1529 tf->command = buf[2]; 1530 tf->feature = buf[3]; 1531 tf->lbal = buf[4]; 1532 tf->lbam = buf[5]; 1533 tf->lbah = buf[6]; 1534 tf->device = buf[7]; 1535 tf->hob_lbal = buf[8]; 1536 tf->hob_lbam = buf[9]; 1537 tf->hob_lbah = buf[10]; 1538 tf->nsect = buf[12]; 1539 tf->hob_nsect = buf[13]; 1540 1541 return 0; 1542 } 1543 1544 /** 1545 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1546 * @dev: target ATAPI device 1547 * @r_sense_key: out parameter for sense_key 1548 * 1549 * Perform ATAPI TEST_UNIT_READY. 1550 * 1551 * LOCKING: 1552 * EH context (may sleep). 1553 * 1554 * RETURNS: 1555 * 0 on success, AC_ERR_* mask on failure. 1556 */ 1557 static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1558 { 1559 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1560 struct ata_taskfile tf; 1561 unsigned int err_mask; 1562 1563 ata_tf_init(dev, &tf); 1564 1565 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1566 tf.command = ATA_CMD_PACKET; 1567 tf.protocol = ATAPI_PROT_NODATA; 1568 1569 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1570 if (err_mask == AC_ERR_DEV) 1571 *r_sense_key = tf.feature >> 4; 1572 return err_mask; 1573 } 1574 1575 /** 1576 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1577 * @dev: device to perform REQUEST_SENSE to 1578 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1579 * @dfl_sense_key: default sense key to use 1580 * 1581 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1582 * SENSE. This function is EH helper. 1583 * 1584 * LOCKING: 1585 * Kernel thread context (may sleep). 1586 * 1587 * RETURNS: 1588 * 0 on success, AC_ERR_* mask on failure 1589 */ 1590 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 1591 u8 *sense_buf, u8 dfl_sense_key) 1592 { 1593 u8 cdb[ATAPI_CDB_LEN] = 1594 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1595 struct ata_port *ap = dev->link->ap; 1596 struct ata_taskfile tf; 1597 1598 DPRINTK("ATAPI request sense\n"); 1599 1600 /* FIXME: is this needed? */ 1601 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1602 1603 /* initialize sense_buf with the error register, 1604 * for the case where they are -not- overwritten 1605 */ 1606 sense_buf[0] = 0x70; 1607 sense_buf[2] = dfl_sense_key; 1608 1609 /* some devices time out if garbage left in tf */ 1610 ata_tf_init(dev, &tf); 1611 1612 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1613 tf.command = ATA_CMD_PACKET; 1614 1615 /* is it pointless to prefer PIO for "safety reasons"? */ 1616 if (ap->flags & ATA_FLAG_PIO_DMA) { 1617 tf.protocol = ATAPI_PROT_DMA; 1618 tf.feature |= ATAPI_PKT_DMA; 1619 } else { 1620 tf.protocol = ATAPI_PROT_PIO; 1621 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1622 tf.lbah = 0; 1623 } 1624 1625 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1626 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1627 } 1628 1629 /** 1630 * ata_eh_analyze_serror - analyze SError for a failed port 1631 * @link: ATA link to analyze SError for 1632 * 1633 * Analyze SError if available and further determine cause of 1634 * failure. 1635 * 1636 * LOCKING: 1637 * None. 1638 */ 1639 static void ata_eh_analyze_serror(struct ata_link *link) 1640 { 1641 struct ata_eh_context *ehc = &link->eh_context; 1642 u32 serror = ehc->i.serror; 1643 unsigned int err_mask = 0, action = 0; 1644 u32 hotplug_mask; 1645 1646 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1647 err_mask |= AC_ERR_ATA_BUS; 1648 action |= ATA_EH_RESET; 1649 } 1650 if (serror & SERR_PROTOCOL) { 1651 err_mask |= AC_ERR_HSM; 1652 action |= ATA_EH_RESET; 1653 } 1654 if (serror & SERR_INTERNAL) { 1655 err_mask |= AC_ERR_SYSTEM; 1656 action |= ATA_EH_RESET; 1657 } 1658 1659 /* Determine whether a hotplug event has occurred. Both 1660 * SError.N/X are considered hotplug events for enabled or 1661 * host links. For disabled PMP links, only N bit is 1662 * considered as X bit is left at 1 for link plugging. 1663 */ 1664 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1665 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1666 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1667 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1668 else 1669 hotplug_mask = SERR_PHYRDY_CHG; 1670 1671 if (serror & hotplug_mask) 1672 ata_ehi_hotplugged(&ehc->i); 1673 1674 ehc->i.err_mask |= err_mask; 1675 ehc->i.action |= action; 1676 } 1677 1678 /** 1679 * ata_eh_analyze_ncq_error - analyze NCQ error 1680 * @link: ATA link to analyze NCQ error for 1681 * 1682 * Read log page 10h, determine the offending qc and acquire 1683 * error status TF. For NCQ device errors, all LLDDs have to do 1684 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1685 * care of the rest. 1686 * 1687 * LOCKING: 1688 * Kernel thread context (may sleep). 1689 */ 1690 void ata_eh_analyze_ncq_error(struct ata_link *link) 1691 { 1692 struct ata_port *ap = link->ap; 1693 struct ata_eh_context *ehc = &link->eh_context; 1694 struct ata_device *dev = link->device; 1695 struct ata_queued_cmd *qc; 1696 struct ata_taskfile tf; 1697 int tag, rc; 1698 1699 /* if frozen, we can't do much */ 1700 if (ap->pflags & ATA_PFLAG_FROZEN) 1701 return; 1702 1703 /* is it NCQ device error? */ 1704 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1705 return; 1706 1707 /* has LLDD analyzed already? */ 1708 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1709 qc = __ata_qc_from_tag(ap, tag); 1710 1711 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1712 continue; 1713 1714 if (qc->err_mask) 1715 return; 1716 } 1717 1718 /* okay, this error is ours */ 1719 memset(&tf, 0, sizeof(tf)); 1720 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1721 if (rc) { 1722 ata_link_err(link, "failed to read log page 10h (errno=%d)\n", 1723 rc); 1724 return; 1725 } 1726 1727 if (!(link->sactive & (1 << tag))) { 1728 ata_link_err(link, "log page 10h reported inactive tag %d\n", 1729 tag); 1730 return; 1731 } 1732 1733 /* we've got the perpetrator, condemn it */ 1734 qc = __ata_qc_from_tag(ap, tag); 1735 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1736 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1737 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1738 ehc->i.err_mask &= ~AC_ERR_DEV; 1739 } 1740 1741 /** 1742 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1743 * @qc: qc to analyze 1744 * @tf: Taskfile registers to analyze 1745 * 1746 * Analyze taskfile of @qc and further determine cause of 1747 * failure. This function also requests ATAPI sense data if 1748 * available. 1749 * 1750 * LOCKING: 1751 * Kernel thread context (may sleep). 1752 * 1753 * RETURNS: 1754 * Determined recovery action 1755 */ 1756 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1757 const struct ata_taskfile *tf) 1758 { 1759 unsigned int tmp, action = 0; 1760 u8 stat = tf->command, err = tf->feature; 1761 1762 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1763 qc->err_mask |= AC_ERR_HSM; 1764 return ATA_EH_RESET; 1765 } 1766 1767 if (stat & (ATA_ERR | ATA_DF)) 1768 qc->err_mask |= AC_ERR_DEV; 1769 else 1770 return 0; 1771 1772 switch (qc->dev->class) { 1773 case ATA_DEV_ATA: 1774 if (err & ATA_ICRC) 1775 qc->err_mask |= AC_ERR_ATA_BUS; 1776 if (err & ATA_UNC) 1777 qc->err_mask |= AC_ERR_MEDIA; 1778 if (err & ATA_IDNF) 1779 qc->err_mask |= AC_ERR_INVALID; 1780 break; 1781 1782 case ATA_DEV_ATAPI: 1783 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1784 tmp = atapi_eh_request_sense(qc->dev, 1785 qc->scsicmd->sense_buffer, 1786 qc->result_tf.feature >> 4); 1787 if (!tmp) { 1788 /* ATA_QCFLAG_SENSE_VALID is used to 1789 * tell atapi_qc_complete() that sense 1790 * data is already valid. 1791 * 1792 * TODO: interpret sense data and set 1793 * appropriate err_mask. 1794 */ 1795 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1796 } else 1797 qc->err_mask |= tmp; 1798 } 1799 } 1800 1801 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1802 action |= ATA_EH_RESET; 1803 1804 return action; 1805 } 1806 1807 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1808 int *xfer_ok) 1809 { 1810 int base = 0; 1811 1812 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1813 *xfer_ok = 1; 1814 1815 if (!*xfer_ok) 1816 base = ATA_ECAT_DUBIOUS_NONE; 1817 1818 if (err_mask & AC_ERR_ATA_BUS) 1819 return base + ATA_ECAT_ATA_BUS; 1820 1821 if (err_mask & AC_ERR_TIMEOUT) 1822 return base + ATA_ECAT_TOUT_HSM; 1823 1824 if (eflags & ATA_EFLAG_IS_IO) { 1825 if (err_mask & AC_ERR_HSM) 1826 return base + ATA_ECAT_TOUT_HSM; 1827 if ((err_mask & 1828 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1829 return base + ATA_ECAT_UNK_DEV; 1830 } 1831 1832 return 0; 1833 } 1834 1835 struct speed_down_verdict_arg { 1836 u64 since; 1837 int xfer_ok; 1838 int nr_errors[ATA_ECAT_NR]; 1839 }; 1840 1841 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1842 { 1843 struct speed_down_verdict_arg *arg = void_arg; 1844 int cat; 1845 1846 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1847 return -1; 1848 1849 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1850 &arg->xfer_ok); 1851 arg->nr_errors[cat]++; 1852 1853 return 0; 1854 } 1855 1856 /** 1857 * ata_eh_speed_down_verdict - Determine speed down verdict 1858 * @dev: Device of interest 1859 * 1860 * This function examines error ring of @dev and determines 1861 * whether NCQ needs to be turned off, transfer speed should be 1862 * stepped down, or falling back to PIO is necessary. 1863 * 1864 * ECAT_ATA_BUS : ATA_BUS error for any command 1865 * 1866 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1867 * IO commands 1868 * 1869 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1870 * 1871 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1872 * data transfer hasn't been verified. 1873 * 1874 * Verdicts are 1875 * 1876 * NCQ_OFF : Turn off NCQ. 1877 * 1878 * SPEED_DOWN : Speed down transfer speed but don't fall back 1879 * to PIO. 1880 * 1881 * FALLBACK_TO_PIO : Fall back to PIO. 1882 * 1883 * Even if multiple verdicts are returned, only one action is 1884 * taken per error. An action triggered by non-DUBIOUS errors 1885 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1886 * This is to expedite speed down decisions right after device is 1887 * initially configured. 1888 * 1889 * The followings are speed down rules. #1 and #2 deal with 1890 * DUBIOUS errors. 1891 * 1892 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1893 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1894 * 1895 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1896 * occurred during last 5 mins, NCQ_OFF. 1897 * 1898 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1899 * occurred during last 5 mins, FALLBACK_TO_PIO 1900 * 1901 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1902 * during last 10 mins, NCQ_OFF. 1903 * 1904 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1905 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1906 * 1907 * LOCKING: 1908 * Inherited from caller. 1909 * 1910 * RETURNS: 1911 * OR of ATA_EH_SPDN_* flags. 1912 */ 1913 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1914 { 1915 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1916 u64 j64 = get_jiffies_64(); 1917 struct speed_down_verdict_arg arg; 1918 unsigned int verdict = 0; 1919 1920 /* scan past 5 mins of error history */ 1921 memset(&arg, 0, sizeof(arg)); 1922 arg.since = j64 - min(j64, j5mins); 1923 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1924 1925 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1926 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1927 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1928 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1929 1930 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1931 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1932 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1933 1934 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1935 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1936 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1937 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1938 1939 /* scan past 10 mins of error history */ 1940 memset(&arg, 0, sizeof(arg)); 1941 arg.since = j64 - min(j64, j10mins); 1942 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1943 1944 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1945 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1946 verdict |= ATA_EH_SPDN_NCQ_OFF; 1947 1948 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1949 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1950 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1951 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1952 1953 return verdict; 1954 } 1955 1956 /** 1957 * ata_eh_speed_down - record error and speed down if necessary 1958 * @dev: Failed device 1959 * @eflags: mask of ATA_EFLAG_* flags 1960 * @err_mask: err_mask of the error 1961 * 1962 * Record error and examine error history to determine whether 1963 * adjusting transmission speed is necessary. It also sets 1964 * transmission limits appropriately if such adjustment is 1965 * necessary. 1966 * 1967 * LOCKING: 1968 * Kernel thread context (may sleep). 1969 * 1970 * RETURNS: 1971 * Determined recovery action. 1972 */ 1973 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1974 unsigned int eflags, unsigned int err_mask) 1975 { 1976 struct ata_link *link = ata_dev_phys_link(dev); 1977 int xfer_ok = 0; 1978 unsigned int verdict; 1979 unsigned int action = 0; 1980 1981 /* don't bother if Cat-0 error */ 1982 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1983 return 0; 1984 1985 /* record error and determine whether speed down is necessary */ 1986 ata_ering_record(&dev->ering, eflags, err_mask); 1987 verdict = ata_eh_speed_down_verdict(dev); 1988 1989 /* turn off NCQ? */ 1990 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1991 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1992 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1993 dev->flags |= ATA_DFLAG_NCQ_OFF; 1994 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 1995 goto done; 1996 } 1997 1998 /* speed down? */ 1999 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 2000 /* speed down SATA link speed if possible */ 2001 if (sata_down_spd_limit(link, 0) == 0) { 2002 action |= ATA_EH_RESET; 2003 goto done; 2004 } 2005 2006 /* lower transfer mode */ 2007 if (dev->spdn_cnt < 2) { 2008 static const int dma_dnxfer_sel[] = 2009 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 2010 static const int pio_dnxfer_sel[] = 2011 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 2012 int sel; 2013 2014 if (dev->xfer_shift != ATA_SHIFT_PIO) 2015 sel = dma_dnxfer_sel[dev->spdn_cnt]; 2016 else 2017 sel = pio_dnxfer_sel[dev->spdn_cnt]; 2018 2019 dev->spdn_cnt++; 2020 2021 if (ata_down_xfermask_limit(dev, sel) == 0) { 2022 action |= ATA_EH_RESET; 2023 goto done; 2024 } 2025 } 2026 } 2027 2028 /* Fall back to PIO? Slowing down to PIO is meaningless for 2029 * SATA ATA devices. Consider it only for PATA and SATAPI. 2030 */ 2031 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 2032 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 2033 (dev->xfer_shift != ATA_SHIFT_PIO)) { 2034 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 2035 dev->spdn_cnt = 0; 2036 action |= ATA_EH_RESET; 2037 goto done; 2038 } 2039 } 2040 2041 return 0; 2042 done: 2043 /* device has been slowed down, blow error history */ 2044 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 2045 ata_ering_clear(&dev->ering); 2046 return action; 2047 } 2048 2049 /** 2050 * ata_eh_worth_retry - analyze error and decide whether to retry 2051 * @qc: qc to possibly retry 2052 * 2053 * Look at the cause of the error and decide if a retry 2054 * might be useful or not. We don't want to retry media errors 2055 * because the drive itself has probably already taken 10-30 seconds 2056 * doing its own internal retries before reporting the failure. 2057 */ 2058 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 2059 { 2060 if (qc->flags & AC_ERR_MEDIA) 2061 return 0; /* don't retry media errors */ 2062 if (qc->flags & ATA_QCFLAG_IO) 2063 return 1; /* otherwise retry anything from fs stack */ 2064 if (qc->err_mask & AC_ERR_INVALID) 2065 return 0; /* don't retry these */ 2066 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 2067 } 2068 2069 /** 2070 * ata_eh_link_autopsy - analyze error and determine recovery action 2071 * @link: host link to perform autopsy on 2072 * 2073 * Analyze why @link failed and determine which recovery actions 2074 * are needed. This function also sets more detailed AC_ERR_* 2075 * values and fills sense data for ATAPI CHECK SENSE. 2076 * 2077 * LOCKING: 2078 * Kernel thread context (may sleep). 2079 */ 2080 static void ata_eh_link_autopsy(struct ata_link *link) 2081 { 2082 struct ata_port *ap = link->ap; 2083 struct ata_eh_context *ehc = &link->eh_context; 2084 struct ata_device *dev; 2085 unsigned int all_err_mask = 0, eflags = 0; 2086 int tag; 2087 u32 serror; 2088 int rc; 2089 2090 DPRINTK("ENTER\n"); 2091 2092 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2093 return; 2094 2095 /* obtain and analyze SError */ 2096 rc = sata_scr_read(link, SCR_ERROR, &serror); 2097 if (rc == 0) { 2098 ehc->i.serror |= serror; 2099 ata_eh_analyze_serror(link); 2100 } else if (rc != -EOPNOTSUPP) { 2101 /* SError read failed, force reset and probing */ 2102 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2103 ehc->i.action |= ATA_EH_RESET; 2104 ehc->i.err_mask |= AC_ERR_OTHER; 2105 } 2106 2107 /* analyze NCQ failure */ 2108 ata_eh_analyze_ncq_error(link); 2109 2110 /* any real error trumps AC_ERR_OTHER */ 2111 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2112 ehc->i.err_mask &= ~AC_ERR_OTHER; 2113 2114 all_err_mask |= ehc->i.err_mask; 2115 2116 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2117 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2118 2119 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2120 ata_dev_phys_link(qc->dev) != link) 2121 continue; 2122 2123 /* inherit upper level err_mask */ 2124 qc->err_mask |= ehc->i.err_mask; 2125 2126 /* analyze TF */ 2127 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2128 2129 /* DEV errors are probably spurious in case of ATA_BUS error */ 2130 if (qc->err_mask & AC_ERR_ATA_BUS) 2131 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2132 AC_ERR_INVALID); 2133 2134 /* any real error trumps unknown error */ 2135 if (qc->err_mask & ~AC_ERR_OTHER) 2136 qc->err_mask &= ~AC_ERR_OTHER; 2137 2138 /* SENSE_VALID trumps dev/unknown error and revalidation */ 2139 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2140 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2141 2142 /* determine whether the command is worth retrying */ 2143 if (ata_eh_worth_retry(qc)) 2144 qc->flags |= ATA_QCFLAG_RETRY; 2145 2146 /* accumulate error info */ 2147 ehc->i.dev = qc->dev; 2148 all_err_mask |= qc->err_mask; 2149 if (qc->flags & ATA_QCFLAG_IO) 2150 eflags |= ATA_EFLAG_IS_IO; 2151 } 2152 2153 /* enforce default EH actions */ 2154 if (ap->pflags & ATA_PFLAG_FROZEN || 2155 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2156 ehc->i.action |= ATA_EH_RESET; 2157 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2158 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2159 ehc->i.action |= ATA_EH_REVALIDATE; 2160 2161 /* If we have offending qcs and the associated failed device, 2162 * perform per-dev EH action only on the offending device. 2163 */ 2164 if (ehc->i.dev) { 2165 ehc->i.dev_action[ehc->i.dev->devno] |= 2166 ehc->i.action & ATA_EH_PERDEV_MASK; 2167 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2168 } 2169 2170 /* propagate timeout to host link */ 2171 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2172 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2173 2174 /* record error and consider speeding down */ 2175 dev = ehc->i.dev; 2176 if (!dev && ((ata_link_max_devices(link) == 1 && 2177 ata_dev_enabled(link->device)))) 2178 dev = link->device; 2179 2180 if (dev) { 2181 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2182 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2183 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2184 } 2185 2186 DPRINTK("EXIT\n"); 2187 } 2188 2189 /** 2190 * ata_eh_autopsy - analyze error and determine recovery action 2191 * @ap: host port to perform autopsy on 2192 * 2193 * Analyze all links of @ap and determine why they failed and 2194 * which recovery actions are needed. 2195 * 2196 * LOCKING: 2197 * Kernel thread context (may sleep). 2198 */ 2199 void ata_eh_autopsy(struct ata_port *ap) 2200 { 2201 struct ata_link *link; 2202 2203 ata_for_each_link(link, ap, EDGE) 2204 ata_eh_link_autopsy(link); 2205 2206 /* Handle the frigging slave link. Autopsy is done similarly 2207 * but actions and flags are transferred over to the master 2208 * link and handled from there. 2209 */ 2210 if (ap->slave_link) { 2211 struct ata_eh_context *mehc = &ap->link.eh_context; 2212 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2213 2214 /* transfer control flags from master to slave */ 2215 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2216 2217 /* perform autopsy on the slave link */ 2218 ata_eh_link_autopsy(ap->slave_link); 2219 2220 /* transfer actions from slave to master and clear slave */ 2221 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2222 mehc->i.action |= sehc->i.action; 2223 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2224 mehc->i.flags |= sehc->i.flags; 2225 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2226 } 2227 2228 /* Autopsy of fanout ports can affect host link autopsy. 2229 * Perform host link autopsy last. 2230 */ 2231 if (sata_pmp_attached(ap)) 2232 ata_eh_link_autopsy(&ap->link); 2233 } 2234 2235 /** 2236 * ata_get_cmd_descript - get description for ATA command 2237 * @command: ATA command code to get description for 2238 * 2239 * Return a textual description of the given command, or NULL if the 2240 * command is not known. 2241 * 2242 * LOCKING: 2243 * None 2244 */ 2245 const char *ata_get_cmd_descript(u8 command) 2246 { 2247 #ifdef CONFIG_ATA_VERBOSE_ERROR 2248 static const struct 2249 { 2250 u8 command; 2251 const char *text; 2252 } cmd_descr[] = { 2253 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2254 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2255 { ATA_CMD_STANDBY, "STANDBY" }, 2256 { ATA_CMD_IDLE, "IDLE" }, 2257 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2258 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2259 { ATA_CMD_NOP, "NOP" }, 2260 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2261 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2262 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2263 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2264 { ATA_CMD_SERVICE, "SERVICE" }, 2265 { ATA_CMD_READ, "READ DMA" }, 2266 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2267 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2268 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2269 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2270 { ATA_CMD_WRITE, "WRITE DMA" }, 2271 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2272 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2273 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2274 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2275 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2276 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2277 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2278 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2279 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2280 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2281 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2282 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2283 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2284 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2285 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2286 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2287 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2288 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2289 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2290 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2291 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2292 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2293 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2294 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2295 { ATA_CMD_SLEEP, "SLEEP" }, 2296 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2297 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2298 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2299 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2300 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2301 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2302 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2303 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2304 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2305 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2306 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2307 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2308 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2309 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2310 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2311 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2312 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2313 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2314 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2315 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2316 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2317 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2318 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2319 { ATA_CMD_SMART, "SMART" }, 2320 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2321 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2322 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2323 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2324 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2325 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2326 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2327 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2328 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2329 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2330 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2331 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2332 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2333 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2334 { 0, NULL } /* terminate list */ 2335 }; 2336 2337 unsigned int i; 2338 for (i = 0; cmd_descr[i].text; i++) 2339 if (cmd_descr[i].command == command) 2340 return cmd_descr[i].text; 2341 #endif 2342 2343 return NULL; 2344 } 2345 2346 /** 2347 * ata_eh_link_report - report error handling to user 2348 * @link: ATA link EH is going on 2349 * 2350 * Report EH to user. 2351 * 2352 * LOCKING: 2353 * None. 2354 */ 2355 static void ata_eh_link_report(struct ata_link *link) 2356 { 2357 struct ata_port *ap = link->ap; 2358 struct ata_eh_context *ehc = &link->eh_context; 2359 const char *frozen, *desc; 2360 char tries_buf[6]; 2361 int tag, nr_failed = 0; 2362 2363 if (ehc->i.flags & ATA_EHI_QUIET) 2364 return; 2365 2366 desc = NULL; 2367 if (ehc->i.desc[0] != '\0') 2368 desc = ehc->i.desc; 2369 2370 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2371 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2372 2373 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2374 ata_dev_phys_link(qc->dev) != link || 2375 ((qc->flags & ATA_QCFLAG_QUIET) && 2376 qc->err_mask == AC_ERR_DEV)) 2377 continue; 2378 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2379 continue; 2380 2381 nr_failed++; 2382 } 2383 2384 if (!nr_failed && !ehc->i.err_mask) 2385 return; 2386 2387 frozen = ""; 2388 if (ap->pflags & ATA_PFLAG_FROZEN) 2389 frozen = " frozen"; 2390 2391 memset(tries_buf, 0, sizeof(tries_buf)); 2392 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2393 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 2394 ap->eh_tries); 2395 2396 if (ehc->i.dev) { 2397 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2398 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2399 ehc->i.err_mask, link->sactive, ehc->i.serror, 2400 ehc->i.action, frozen, tries_buf); 2401 if (desc) 2402 ata_dev_err(ehc->i.dev, "%s\n", desc); 2403 } else { 2404 ata_link_err(link, "exception Emask 0x%x " 2405 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2406 ehc->i.err_mask, link->sactive, ehc->i.serror, 2407 ehc->i.action, frozen, tries_buf); 2408 if (desc) 2409 ata_link_err(link, "%s\n", desc); 2410 } 2411 2412 #ifdef CONFIG_ATA_VERBOSE_ERROR 2413 if (ehc->i.serror) 2414 ata_link_err(link, 2415 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2416 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2417 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2418 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2419 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2420 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2421 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2422 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2423 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2424 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2425 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2426 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2427 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2428 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2429 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2430 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2431 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2432 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2433 #endif 2434 2435 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2436 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2437 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2438 const u8 *cdb = qc->cdb; 2439 char data_buf[20] = ""; 2440 char cdb_buf[70] = ""; 2441 2442 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2443 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2444 continue; 2445 2446 if (qc->dma_dir != DMA_NONE) { 2447 static const char *dma_str[] = { 2448 [DMA_BIDIRECTIONAL] = "bidi", 2449 [DMA_TO_DEVICE] = "out", 2450 [DMA_FROM_DEVICE] = "in", 2451 }; 2452 static const char *prot_str[] = { 2453 [ATA_PROT_PIO] = "pio", 2454 [ATA_PROT_DMA] = "dma", 2455 [ATA_PROT_NCQ] = "ncq", 2456 [ATAPI_PROT_PIO] = "pio", 2457 [ATAPI_PROT_DMA] = "dma", 2458 }; 2459 2460 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2461 prot_str[qc->tf.protocol], qc->nbytes, 2462 dma_str[qc->dma_dir]); 2463 } 2464 2465 if (ata_is_atapi(qc->tf.protocol)) { 2466 if (qc->scsicmd) 2467 scsi_print_command(qc->scsicmd); 2468 else 2469 snprintf(cdb_buf, sizeof(cdb_buf), 2470 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 2471 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 2472 cdb[0], cdb[1], cdb[2], cdb[3], 2473 cdb[4], cdb[5], cdb[6], cdb[7], 2474 cdb[8], cdb[9], cdb[10], cdb[11], 2475 cdb[12], cdb[13], cdb[14], cdb[15]); 2476 } else { 2477 const char *descr = ata_get_cmd_descript(cmd->command); 2478 if (descr) 2479 ata_dev_err(qc->dev, "failed command: %s\n", 2480 descr); 2481 } 2482 2483 ata_dev_err(qc->dev, 2484 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2485 "tag %d%s\n %s" 2486 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2487 "Emask 0x%x (%s)%s\n", 2488 cmd->command, cmd->feature, cmd->nsect, 2489 cmd->lbal, cmd->lbam, cmd->lbah, 2490 cmd->hob_feature, cmd->hob_nsect, 2491 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2492 cmd->device, qc->tag, data_buf, cdb_buf, 2493 res->command, res->feature, res->nsect, 2494 res->lbal, res->lbam, res->lbah, 2495 res->hob_feature, res->hob_nsect, 2496 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2497 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2498 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2499 2500 #ifdef CONFIG_ATA_VERBOSE_ERROR 2501 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2502 ATA_ERR)) { 2503 if (res->command & ATA_BUSY) 2504 ata_dev_err(qc->dev, "status: { Busy }\n"); 2505 else 2506 ata_dev_err(qc->dev, "status: { %s%s%s%s}\n", 2507 res->command & ATA_DRDY ? "DRDY " : "", 2508 res->command & ATA_DF ? "DF " : "", 2509 res->command & ATA_DRQ ? "DRQ " : "", 2510 res->command & ATA_ERR ? "ERR " : ""); 2511 } 2512 2513 if (cmd->command != ATA_CMD_PACKET && 2514 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2515 ATA_ABORTED))) 2516 ata_dev_err(qc->dev, "error: { %s%s%s%s}\n", 2517 res->feature & ATA_ICRC ? "ICRC " : "", 2518 res->feature & ATA_UNC ? "UNC " : "", 2519 res->feature & ATA_IDNF ? "IDNF " : "", 2520 res->feature & ATA_ABORTED ? "ABRT " : ""); 2521 #endif 2522 } 2523 } 2524 2525 /** 2526 * ata_eh_report - report error handling to user 2527 * @ap: ATA port to report EH about 2528 * 2529 * Report EH to user. 2530 * 2531 * LOCKING: 2532 * None. 2533 */ 2534 void ata_eh_report(struct ata_port *ap) 2535 { 2536 struct ata_link *link; 2537 2538 ata_for_each_link(link, ap, HOST_FIRST) 2539 ata_eh_link_report(link); 2540 } 2541 2542 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2543 unsigned int *classes, unsigned long deadline, 2544 bool clear_classes) 2545 { 2546 struct ata_device *dev; 2547 2548 if (clear_classes) 2549 ata_for_each_dev(dev, link, ALL) 2550 classes[dev->devno] = ATA_DEV_UNKNOWN; 2551 2552 return reset(link, classes, deadline); 2553 } 2554 2555 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2556 { 2557 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2558 return 0; 2559 if (rc == -EAGAIN) 2560 return 1; 2561 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2562 return 1; 2563 return 0; 2564 } 2565 2566 int ata_eh_reset(struct ata_link *link, int classify, 2567 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2568 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2569 { 2570 struct ata_port *ap = link->ap; 2571 struct ata_link *slave = ap->slave_link; 2572 struct ata_eh_context *ehc = &link->eh_context; 2573 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2574 unsigned int *classes = ehc->classes; 2575 unsigned int lflags = link->flags; 2576 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2577 int max_tries = 0, try = 0; 2578 struct ata_link *failed_link; 2579 struct ata_device *dev; 2580 unsigned long deadline, now; 2581 ata_reset_fn_t reset; 2582 unsigned long flags; 2583 u32 sstatus; 2584 int nr_unknown, rc; 2585 2586 /* 2587 * Prepare to reset 2588 */ 2589 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2590 max_tries++; 2591 if (link->flags & ATA_LFLAG_NO_HRST) 2592 hardreset = NULL; 2593 if (link->flags & ATA_LFLAG_NO_SRST) 2594 softreset = NULL; 2595 2596 /* make sure each reset attempt is at least COOL_DOWN apart */ 2597 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2598 now = jiffies; 2599 WARN_ON(time_after(ehc->last_reset, now)); 2600 deadline = ata_deadline(ehc->last_reset, 2601 ATA_EH_RESET_COOL_DOWN); 2602 if (time_before(now, deadline)) 2603 schedule_timeout_uninterruptible(deadline - now); 2604 } 2605 2606 spin_lock_irqsave(ap->lock, flags); 2607 ap->pflags |= ATA_PFLAG_RESETTING; 2608 spin_unlock_irqrestore(ap->lock, flags); 2609 2610 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2611 2612 ata_for_each_dev(dev, link, ALL) { 2613 /* If we issue an SRST then an ATA drive (not ATAPI) 2614 * may change configuration and be in PIO0 timing. If 2615 * we do a hard reset (or are coming from power on) 2616 * this is true for ATA or ATAPI. Until we've set a 2617 * suitable controller mode we should not touch the 2618 * bus as we may be talking too fast. 2619 */ 2620 dev->pio_mode = XFER_PIO_0; 2621 2622 /* If the controller has a pio mode setup function 2623 * then use it to set the chipset to rights. Don't 2624 * touch the DMA setup as that will be dealt with when 2625 * configuring devices. 2626 */ 2627 if (ap->ops->set_piomode) 2628 ap->ops->set_piomode(ap, dev); 2629 } 2630 2631 /* prefer hardreset */ 2632 reset = NULL; 2633 ehc->i.action &= ~ATA_EH_RESET; 2634 if (hardreset) { 2635 reset = hardreset; 2636 ehc->i.action |= ATA_EH_HARDRESET; 2637 } else if (softreset) { 2638 reset = softreset; 2639 ehc->i.action |= ATA_EH_SOFTRESET; 2640 } 2641 2642 if (prereset) { 2643 unsigned long deadline = ata_deadline(jiffies, 2644 ATA_EH_PRERESET_TIMEOUT); 2645 2646 if (slave) { 2647 sehc->i.action &= ~ATA_EH_RESET; 2648 sehc->i.action |= ehc->i.action; 2649 } 2650 2651 rc = prereset(link, deadline); 2652 2653 /* If present, do prereset on slave link too. Reset 2654 * is skipped iff both master and slave links report 2655 * -ENOENT or clear ATA_EH_RESET. 2656 */ 2657 if (slave && (rc == 0 || rc == -ENOENT)) { 2658 int tmp; 2659 2660 tmp = prereset(slave, deadline); 2661 if (tmp != -ENOENT) 2662 rc = tmp; 2663 2664 ehc->i.action |= sehc->i.action; 2665 } 2666 2667 if (rc) { 2668 if (rc == -ENOENT) { 2669 ata_link_dbg(link, "port disabled--ignoring\n"); 2670 ehc->i.action &= ~ATA_EH_RESET; 2671 2672 ata_for_each_dev(dev, link, ALL) 2673 classes[dev->devno] = ATA_DEV_NONE; 2674 2675 rc = 0; 2676 } else 2677 ata_link_err(link, 2678 "prereset failed (errno=%d)\n", 2679 rc); 2680 goto out; 2681 } 2682 2683 /* prereset() might have cleared ATA_EH_RESET. If so, 2684 * bang classes, thaw and return. 2685 */ 2686 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2687 ata_for_each_dev(dev, link, ALL) 2688 classes[dev->devno] = ATA_DEV_NONE; 2689 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2690 ata_is_host_link(link)) 2691 ata_eh_thaw_port(ap); 2692 rc = 0; 2693 goto out; 2694 } 2695 } 2696 2697 retry: 2698 /* 2699 * Perform reset 2700 */ 2701 if (ata_is_host_link(link)) 2702 ata_eh_freeze_port(ap); 2703 2704 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2705 2706 if (reset) { 2707 if (verbose) 2708 ata_link_info(link, "%s resetting link\n", 2709 reset == softreset ? "soft" : "hard"); 2710 2711 /* mark that this EH session started with reset */ 2712 ehc->last_reset = jiffies; 2713 if (reset == hardreset) 2714 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2715 else 2716 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2717 2718 rc = ata_do_reset(link, reset, classes, deadline, true); 2719 if (rc && rc != -EAGAIN) { 2720 failed_link = link; 2721 goto fail; 2722 } 2723 2724 /* hardreset slave link if existent */ 2725 if (slave && reset == hardreset) { 2726 int tmp; 2727 2728 if (verbose) 2729 ata_link_info(slave, "hard resetting link\n"); 2730 2731 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2732 tmp = ata_do_reset(slave, reset, classes, deadline, 2733 false); 2734 switch (tmp) { 2735 case -EAGAIN: 2736 rc = -EAGAIN; 2737 case 0: 2738 break; 2739 default: 2740 failed_link = slave; 2741 rc = tmp; 2742 goto fail; 2743 } 2744 } 2745 2746 /* perform follow-up SRST if necessary */ 2747 if (reset == hardreset && 2748 ata_eh_followup_srst_needed(link, rc)) { 2749 reset = softreset; 2750 2751 if (!reset) { 2752 ata_link_err(link, 2753 "follow-up softreset required but no softreset available\n"); 2754 failed_link = link; 2755 rc = -EINVAL; 2756 goto fail; 2757 } 2758 2759 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2760 rc = ata_do_reset(link, reset, classes, deadline, true); 2761 if (rc) { 2762 failed_link = link; 2763 goto fail; 2764 } 2765 } 2766 } else { 2767 if (verbose) 2768 ata_link_info(link, 2769 "no reset method available, skipping reset\n"); 2770 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2771 lflags |= ATA_LFLAG_ASSUME_ATA; 2772 } 2773 2774 /* 2775 * Post-reset processing 2776 */ 2777 ata_for_each_dev(dev, link, ALL) { 2778 /* After the reset, the device state is PIO 0 and the 2779 * controller state is undefined. Reset also wakes up 2780 * drives from sleeping mode. 2781 */ 2782 dev->pio_mode = XFER_PIO_0; 2783 dev->flags &= ~ATA_DFLAG_SLEEPING; 2784 2785 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2786 continue; 2787 2788 /* apply class override */ 2789 if (lflags & ATA_LFLAG_ASSUME_ATA) 2790 classes[dev->devno] = ATA_DEV_ATA; 2791 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2792 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2793 } 2794 2795 /* record current link speed */ 2796 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2797 link->sata_spd = (sstatus >> 4) & 0xf; 2798 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2799 slave->sata_spd = (sstatus >> 4) & 0xf; 2800 2801 /* thaw the port */ 2802 if (ata_is_host_link(link)) 2803 ata_eh_thaw_port(ap); 2804 2805 /* postreset() should clear hardware SError. Although SError 2806 * is cleared during link resume, clearing SError here is 2807 * necessary as some PHYs raise hotplug events after SRST. 2808 * This introduces race condition where hotplug occurs between 2809 * reset and here. This race is mediated by cross checking 2810 * link onlineness and classification result later. 2811 */ 2812 if (postreset) { 2813 postreset(link, classes); 2814 if (slave) 2815 postreset(slave, classes); 2816 } 2817 2818 /* 2819 * Some controllers can't be frozen very well and may set spurious 2820 * error conditions during reset. Clear accumulated error 2821 * information and re-thaw the port if frozen. As reset is the 2822 * final recovery action and we cross check link onlineness against 2823 * device classification later, no hotplug event is lost by this. 2824 */ 2825 spin_lock_irqsave(link->ap->lock, flags); 2826 memset(&link->eh_info, 0, sizeof(link->eh_info)); 2827 if (slave) 2828 memset(&slave->eh_info, 0, sizeof(link->eh_info)); 2829 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 2830 spin_unlock_irqrestore(link->ap->lock, flags); 2831 2832 if (ap->pflags & ATA_PFLAG_FROZEN) 2833 ata_eh_thaw_port(ap); 2834 2835 /* 2836 * Make sure onlineness and classification result correspond. 2837 * Hotplug could have happened during reset and some 2838 * controllers fail to wait while a drive is spinning up after 2839 * being hotplugged causing misdetection. By cross checking 2840 * link on/offlineness and classification result, those 2841 * conditions can be reliably detected and retried. 2842 */ 2843 nr_unknown = 0; 2844 ata_for_each_dev(dev, link, ALL) { 2845 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2846 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2847 ata_dev_dbg(dev, "link online but device misclassified\n"); 2848 classes[dev->devno] = ATA_DEV_NONE; 2849 nr_unknown++; 2850 } 2851 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2852 if (ata_class_enabled(classes[dev->devno])) 2853 ata_dev_dbg(dev, 2854 "link offline, clearing class %d to NONE\n", 2855 classes[dev->devno]); 2856 classes[dev->devno] = ATA_DEV_NONE; 2857 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2858 ata_dev_dbg(dev, 2859 "link status unknown, clearing UNKNOWN to NONE\n"); 2860 classes[dev->devno] = ATA_DEV_NONE; 2861 } 2862 } 2863 2864 if (classify && nr_unknown) { 2865 if (try < max_tries) { 2866 ata_link_warn(link, 2867 "link online but %d devices misclassified, retrying\n", 2868 nr_unknown); 2869 failed_link = link; 2870 rc = -EAGAIN; 2871 goto fail; 2872 } 2873 ata_link_warn(link, 2874 "link online but %d devices misclassified, " 2875 "device detection might fail\n", nr_unknown); 2876 } 2877 2878 /* reset successful, schedule revalidation */ 2879 ata_eh_done(link, NULL, ATA_EH_RESET); 2880 if (slave) 2881 ata_eh_done(slave, NULL, ATA_EH_RESET); 2882 ehc->last_reset = jiffies; /* update to completion time */ 2883 ehc->i.action |= ATA_EH_REVALIDATE; 2884 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 2885 2886 rc = 0; 2887 out: 2888 /* clear hotplug flag */ 2889 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2890 if (slave) 2891 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2892 2893 spin_lock_irqsave(ap->lock, flags); 2894 ap->pflags &= ~ATA_PFLAG_RESETTING; 2895 spin_unlock_irqrestore(ap->lock, flags); 2896 2897 return rc; 2898 2899 fail: 2900 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2901 if (!ata_is_host_link(link) && 2902 sata_scr_read(link, SCR_STATUS, &sstatus)) 2903 rc = -ERESTART; 2904 2905 if (try >= max_tries) { 2906 /* 2907 * Thaw host port even if reset failed, so that the port 2908 * can be retried on the next phy event. This risks 2909 * repeated EH runs but seems to be a better tradeoff than 2910 * shutting down a port after a botched hotplug attempt. 2911 */ 2912 if (ata_is_host_link(link)) 2913 ata_eh_thaw_port(ap); 2914 goto out; 2915 } 2916 2917 now = jiffies; 2918 if (time_before(now, deadline)) { 2919 unsigned long delta = deadline - now; 2920 2921 ata_link_warn(failed_link, 2922 "reset failed (errno=%d), retrying in %u secs\n", 2923 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2924 2925 ata_eh_release(ap); 2926 while (delta) 2927 delta = schedule_timeout_uninterruptible(delta); 2928 ata_eh_acquire(ap); 2929 } 2930 2931 /* 2932 * While disks spinup behind PMP, some controllers fail sending SRST. 2933 * They need to be reset - as well as the PMP - before retrying. 2934 */ 2935 if (rc == -ERESTART) { 2936 if (ata_is_host_link(link)) 2937 ata_eh_thaw_port(ap); 2938 goto out; 2939 } 2940 2941 if (try == max_tries - 1) { 2942 sata_down_spd_limit(link, 0); 2943 if (slave) 2944 sata_down_spd_limit(slave, 0); 2945 } else if (rc == -EPIPE) 2946 sata_down_spd_limit(failed_link, 0); 2947 2948 if (hardreset) 2949 reset = hardreset; 2950 goto retry; 2951 } 2952 2953 static inline void ata_eh_pull_park_action(struct ata_port *ap) 2954 { 2955 struct ata_link *link; 2956 struct ata_device *dev; 2957 unsigned long flags; 2958 2959 /* 2960 * This function can be thought of as an extended version of 2961 * ata_eh_about_to_do() specially crafted to accommodate the 2962 * requirements of ATA_EH_PARK handling. Since the EH thread 2963 * does not leave the do {} while () loop in ata_eh_recover as 2964 * long as the timeout for a park request to *one* device on 2965 * the port has not expired, and since we still want to pick 2966 * up park requests to other devices on the same port or 2967 * timeout updates for the same device, we have to pull 2968 * ATA_EH_PARK actions from eh_info into eh_context.i 2969 * ourselves at the beginning of each pass over the loop. 2970 * 2971 * Additionally, all write accesses to &ap->park_req_pending 2972 * through INIT_COMPLETION() (see below) or complete_all() 2973 * (see ata_scsi_park_store()) are protected by the host lock. 2974 * As a result we have that park_req_pending.done is zero on 2975 * exit from this function, i.e. when ATA_EH_PARK actions for 2976 * *all* devices on port ap have been pulled into the 2977 * respective eh_context structs. If, and only if, 2978 * park_req_pending.done is non-zero by the time we reach 2979 * wait_for_completion_timeout(), another ATA_EH_PARK action 2980 * has been scheduled for at least one of the devices on port 2981 * ap and we have to cycle over the do {} while () loop in 2982 * ata_eh_recover() again. 2983 */ 2984 2985 spin_lock_irqsave(ap->lock, flags); 2986 INIT_COMPLETION(ap->park_req_pending); 2987 ata_for_each_link(link, ap, EDGE) { 2988 ata_for_each_dev(dev, link, ALL) { 2989 struct ata_eh_info *ehi = &link->eh_info; 2990 2991 link->eh_context.i.dev_action[dev->devno] |= 2992 ehi->dev_action[dev->devno] & ATA_EH_PARK; 2993 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 2994 } 2995 } 2996 spin_unlock_irqrestore(ap->lock, flags); 2997 } 2998 2999 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3000 { 3001 struct ata_eh_context *ehc = &dev->link->eh_context; 3002 struct ata_taskfile tf; 3003 unsigned int err_mask; 3004 3005 ata_tf_init(dev, &tf); 3006 if (park) { 3007 ehc->unloaded_mask |= 1 << dev->devno; 3008 tf.command = ATA_CMD_IDLEIMMEDIATE; 3009 tf.feature = 0x44; 3010 tf.lbal = 0x4c; 3011 tf.lbam = 0x4e; 3012 tf.lbah = 0x55; 3013 } else { 3014 ehc->unloaded_mask &= ~(1 << dev->devno); 3015 tf.command = ATA_CMD_CHK_POWER; 3016 } 3017 3018 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3019 tf.protocol |= ATA_PROT_NODATA; 3020 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3021 if (park && (err_mask || tf.lbal != 0xc4)) { 3022 ata_dev_err(dev, "head unload failed!\n"); 3023 ehc->unloaded_mask &= ~(1 << dev->devno); 3024 } 3025 } 3026 3027 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3028 struct ata_device **r_failed_dev) 3029 { 3030 struct ata_port *ap = link->ap; 3031 struct ata_eh_context *ehc = &link->eh_context; 3032 struct ata_device *dev; 3033 unsigned int new_mask = 0; 3034 unsigned long flags; 3035 int rc = 0; 3036 3037 DPRINTK("ENTER\n"); 3038 3039 /* For PATA drive side cable detection to work, IDENTIFY must 3040 * be done backwards such that PDIAG- is released by the slave 3041 * device before the master device is identified. 3042 */ 3043 ata_for_each_dev(dev, link, ALL_REVERSE) { 3044 unsigned int action = ata_eh_dev_action(dev); 3045 unsigned int readid_flags = 0; 3046 3047 if (ehc->i.flags & ATA_EHI_DID_RESET) 3048 readid_flags |= ATA_READID_POSTRESET; 3049 3050 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3051 WARN_ON(dev->class == ATA_DEV_PMP); 3052 3053 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3054 rc = -EIO; 3055 goto err; 3056 } 3057 3058 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3059 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3060 readid_flags); 3061 if (rc) 3062 goto err; 3063 3064 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3065 3066 /* Configuration may have changed, reconfigure 3067 * transfer mode. 3068 */ 3069 ehc->i.flags |= ATA_EHI_SETMODE; 3070 3071 /* schedule the scsi_rescan_device() here */ 3072 schedule_work(&(ap->scsi_rescan_task)); 3073 } else if (dev->class == ATA_DEV_UNKNOWN && 3074 ehc->tries[dev->devno] && 3075 ata_class_enabled(ehc->classes[dev->devno])) { 3076 /* Temporarily set dev->class, it will be 3077 * permanently set once all configurations are 3078 * complete. This is necessary because new 3079 * device configuration is done in two 3080 * separate loops. 3081 */ 3082 dev->class = ehc->classes[dev->devno]; 3083 3084 if (dev->class == ATA_DEV_PMP) 3085 rc = sata_pmp_attach(dev); 3086 else 3087 rc = ata_dev_read_id(dev, &dev->class, 3088 readid_flags, dev->id); 3089 3090 /* read_id might have changed class, store and reset */ 3091 ehc->classes[dev->devno] = dev->class; 3092 dev->class = ATA_DEV_UNKNOWN; 3093 3094 switch (rc) { 3095 case 0: 3096 /* clear error info accumulated during probe */ 3097 ata_ering_clear(&dev->ering); 3098 new_mask |= 1 << dev->devno; 3099 break; 3100 case -ENOENT: 3101 /* IDENTIFY was issued to non-existent 3102 * device. No need to reset. Just 3103 * thaw and ignore the device. 3104 */ 3105 ata_eh_thaw_port(ap); 3106 break; 3107 default: 3108 goto err; 3109 } 3110 } 3111 } 3112 3113 /* PDIAG- should have been released, ask cable type if post-reset */ 3114 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3115 if (ap->ops->cable_detect) 3116 ap->cbl = ap->ops->cable_detect(ap); 3117 ata_force_cbl(ap); 3118 } 3119 3120 /* Configure new devices forward such that user doesn't see 3121 * device detection messages backwards. 3122 */ 3123 ata_for_each_dev(dev, link, ALL) { 3124 if (!(new_mask & (1 << dev->devno))) 3125 continue; 3126 3127 dev->class = ehc->classes[dev->devno]; 3128 3129 if (dev->class == ATA_DEV_PMP) 3130 continue; 3131 3132 ehc->i.flags |= ATA_EHI_PRINTINFO; 3133 rc = ata_dev_configure(dev); 3134 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3135 if (rc) { 3136 dev->class = ATA_DEV_UNKNOWN; 3137 goto err; 3138 } 3139 3140 spin_lock_irqsave(ap->lock, flags); 3141 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3142 spin_unlock_irqrestore(ap->lock, flags); 3143 3144 /* new device discovered, configure xfermode */ 3145 ehc->i.flags |= ATA_EHI_SETMODE; 3146 } 3147 3148 return 0; 3149 3150 err: 3151 *r_failed_dev = dev; 3152 DPRINTK("EXIT rc=%d\n", rc); 3153 return rc; 3154 } 3155 3156 /** 3157 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3158 * @link: link on which timings will be programmed 3159 * @r_failed_dev: out parameter for failed device 3160 * 3161 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3162 * ata_set_mode() fails, pointer to the failing device is 3163 * returned in @r_failed_dev. 3164 * 3165 * LOCKING: 3166 * PCI/etc. bus probe sem. 3167 * 3168 * RETURNS: 3169 * 0 on success, negative errno otherwise 3170 */ 3171 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3172 { 3173 struct ata_port *ap = link->ap; 3174 struct ata_device *dev; 3175 int rc; 3176 3177 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3178 ata_for_each_dev(dev, link, ENABLED) { 3179 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3180 struct ata_ering_entry *ent; 3181 3182 ent = ata_ering_top(&dev->ering); 3183 if (ent) 3184 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3185 } 3186 } 3187 3188 /* has private set_mode? */ 3189 if (ap->ops->set_mode) 3190 rc = ap->ops->set_mode(link, r_failed_dev); 3191 else 3192 rc = ata_do_set_mode(link, r_failed_dev); 3193 3194 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3195 ata_for_each_dev(dev, link, ENABLED) { 3196 struct ata_eh_context *ehc = &link->eh_context; 3197 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3198 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3199 3200 if (dev->xfer_mode != saved_xfer_mode || 3201 ata_ncq_enabled(dev) != saved_ncq) 3202 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3203 } 3204 3205 return rc; 3206 } 3207 3208 /** 3209 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3210 * @dev: ATAPI device to clear UA for 3211 * 3212 * Resets and other operations can make an ATAPI device raise 3213 * UNIT ATTENTION which causes the next operation to fail. This 3214 * function clears UA. 3215 * 3216 * LOCKING: 3217 * EH context (may sleep). 3218 * 3219 * RETURNS: 3220 * 0 on success, -errno on failure. 3221 */ 3222 static int atapi_eh_clear_ua(struct ata_device *dev) 3223 { 3224 int i; 3225 3226 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3227 u8 *sense_buffer = dev->link->ap->sector_buf; 3228 u8 sense_key = 0; 3229 unsigned int err_mask; 3230 3231 err_mask = atapi_eh_tur(dev, &sense_key); 3232 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3233 ata_dev_warn(dev, 3234 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3235 err_mask); 3236 return -EIO; 3237 } 3238 3239 if (!err_mask || sense_key != UNIT_ATTENTION) 3240 return 0; 3241 3242 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3243 if (err_mask) { 3244 ata_dev_warn(dev, "failed to clear " 3245 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3246 return -EIO; 3247 } 3248 } 3249 3250 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3251 ATA_EH_UA_TRIES); 3252 3253 return 0; 3254 } 3255 3256 /** 3257 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3258 * @dev: ATA device which may need FLUSH retry 3259 * 3260 * If @dev failed FLUSH, it needs to be reported upper layer 3261 * immediately as it means that @dev failed to remap and already 3262 * lost at least a sector and further FLUSH retrials won't make 3263 * any difference to the lost sector. However, if FLUSH failed 3264 * for other reasons, for example transmission error, FLUSH needs 3265 * to be retried. 3266 * 3267 * This function determines whether FLUSH failure retry is 3268 * necessary and performs it if so. 3269 * 3270 * RETURNS: 3271 * 0 if EH can continue, -errno if EH needs to be repeated. 3272 */ 3273 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3274 { 3275 struct ata_link *link = dev->link; 3276 struct ata_port *ap = link->ap; 3277 struct ata_queued_cmd *qc; 3278 struct ata_taskfile tf; 3279 unsigned int err_mask; 3280 int rc = 0; 3281 3282 /* did flush fail for this device? */ 3283 if (!ata_tag_valid(link->active_tag)) 3284 return 0; 3285 3286 qc = __ata_qc_from_tag(ap, link->active_tag); 3287 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3288 qc->tf.command != ATA_CMD_FLUSH)) 3289 return 0; 3290 3291 /* if the device failed it, it should be reported to upper layers */ 3292 if (qc->err_mask & AC_ERR_DEV) 3293 return 0; 3294 3295 /* flush failed for some other reason, give it another shot */ 3296 ata_tf_init(dev, &tf); 3297 3298 tf.command = qc->tf.command; 3299 tf.flags |= ATA_TFLAG_DEVICE; 3300 tf.protocol = ATA_PROT_NODATA; 3301 3302 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3303 tf.command, qc->err_mask); 3304 3305 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3306 if (!err_mask) { 3307 /* 3308 * FLUSH is complete but there's no way to 3309 * successfully complete a failed command from EH. 3310 * Making sure retry is allowed at least once and 3311 * retrying it should do the trick - whatever was in 3312 * the cache is already on the platter and this won't 3313 * cause infinite loop. 3314 */ 3315 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3316 } else { 3317 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3318 err_mask); 3319 rc = -EIO; 3320 3321 /* if device failed it, report it to upper layers */ 3322 if (err_mask & AC_ERR_DEV) { 3323 qc->err_mask |= AC_ERR_DEV; 3324 qc->result_tf = tf; 3325 if (!(ap->pflags & ATA_PFLAG_FROZEN)) 3326 rc = 0; 3327 } 3328 } 3329 return rc; 3330 } 3331 3332 /** 3333 * ata_eh_set_lpm - configure SATA interface power management 3334 * @link: link to configure power management 3335 * @policy: the link power management policy 3336 * @r_failed_dev: out parameter for failed device 3337 * 3338 * Enable SATA Interface power management. This will enable 3339 * Device Interface Power Management (DIPM) for min_power 3340 * policy, and then call driver specific callbacks for 3341 * enabling Host Initiated Power management. 3342 * 3343 * LOCKING: 3344 * EH context. 3345 * 3346 * RETURNS: 3347 * 0 on success, -errno on failure. 3348 */ 3349 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 3350 struct ata_device **r_failed_dev) 3351 { 3352 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3353 struct ata_eh_context *ehc = &link->eh_context; 3354 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3355 enum ata_lpm_policy old_policy = link->lpm_policy; 3356 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; 3357 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3358 unsigned int err_mask; 3359 int rc; 3360 3361 /* if the link or host doesn't do LPM, noop */ 3362 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 3363 return 0; 3364 3365 /* 3366 * DIPM is enabled only for MIN_POWER as some devices 3367 * misbehave when the host NACKs transition to SLUMBER. Order 3368 * device and link configurations such that the host always 3369 * allows DIPM requests. 3370 */ 3371 ata_for_each_dev(dev, link, ENABLED) { 3372 bool hipm = ata_id_has_hipm(dev->id); 3373 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; 3374 3375 /* find the first enabled and LPM enabled devices */ 3376 if (!link_dev) 3377 link_dev = dev; 3378 3379 if (!lpm_dev && (hipm || dipm)) 3380 lpm_dev = dev; 3381 3382 hints &= ~ATA_LPM_EMPTY; 3383 if (!hipm) 3384 hints &= ~ATA_LPM_HIPM; 3385 3386 /* disable DIPM before changing link config */ 3387 if (policy != ATA_LPM_MIN_POWER && dipm) { 3388 err_mask = ata_dev_set_feature(dev, 3389 SETFEATURES_SATA_DISABLE, SATA_DIPM); 3390 if (err_mask && err_mask != AC_ERR_DEV) { 3391 ata_dev_warn(dev, 3392 "failed to disable DIPM, Emask 0x%x\n", 3393 err_mask); 3394 rc = -EIO; 3395 goto fail; 3396 } 3397 } 3398 } 3399 3400 if (ap) { 3401 rc = ap->ops->set_lpm(link, policy, hints); 3402 if (!rc && ap->slave_link) 3403 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 3404 } else 3405 rc = sata_pmp_set_lpm(link, policy, hints); 3406 3407 /* 3408 * Attribute link config failure to the first (LPM) enabled 3409 * device on the link. 3410 */ 3411 if (rc) { 3412 if (rc == -EOPNOTSUPP) { 3413 link->flags |= ATA_LFLAG_NO_LPM; 3414 return 0; 3415 } 3416 dev = lpm_dev ? lpm_dev : link_dev; 3417 goto fail; 3418 } 3419 3420 /* 3421 * Low level driver acked the transition. Issue DIPM command 3422 * with the new policy set. 3423 */ 3424 link->lpm_policy = policy; 3425 if (ap && ap->slave_link) 3426 ap->slave_link->lpm_policy = policy; 3427 3428 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3429 ata_for_each_dev(dev, link, ENABLED) { 3430 if (policy == ATA_LPM_MIN_POWER && !no_dipm && 3431 ata_id_has_dipm(dev->id)) { 3432 err_mask = ata_dev_set_feature(dev, 3433 SETFEATURES_SATA_ENABLE, SATA_DIPM); 3434 if (err_mask && err_mask != AC_ERR_DEV) { 3435 ata_dev_warn(dev, 3436 "failed to enable DIPM, Emask 0x%x\n", 3437 err_mask); 3438 rc = -EIO; 3439 goto fail; 3440 } 3441 } 3442 } 3443 3444 return 0; 3445 3446 fail: 3447 /* restore the old policy */ 3448 link->lpm_policy = old_policy; 3449 if (ap && ap->slave_link) 3450 ap->slave_link->lpm_policy = old_policy; 3451 3452 /* if no device or only one more chance is left, disable LPM */ 3453 if (!dev || ehc->tries[dev->devno] <= 2) { 3454 ata_link_warn(link, "disabling LPM on the link\n"); 3455 link->flags |= ATA_LFLAG_NO_LPM; 3456 } 3457 if (r_failed_dev) 3458 *r_failed_dev = dev; 3459 return rc; 3460 } 3461 3462 int ata_link_nr_enabled(struct ata_link *link) 3463 { 3464 struct ata_device *dev; 3465 int cnt = 0; 3466 3467 ata_for_each_dev(dev, link, ENABLED) 3468 cnt++; 3469 return cnt; 3470 } 3471 3472 static int ata_link_nr_vacant(struct ata_link *link) 3473 { 3474 struct ata_device *dev; 3475 int cnt = 0; 3476 3477 ata_for_each_dev(dev, link, ALL) 3478 if (dev->class == ATA_DEV_UNKNOWN) 3479 cnt++; 3480 return cnt; 3481 } 3482 3483 static int ata_eh_skip_recovery(struct ata_link *link) 3484 { 3485 struct ata_port *ap = link->ap; 3486 struct ata_eh_context *ehc = &link->eh_context; 3487 struct ata_device *dev; 3488 3489 /* skip disabled links */ 3490 if (link->flags & ATA_LFLAG_DISABLED) 3491 return 1; 3492 3493 /* skip if explicitly requested */ 3494 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3495 return 1; 3496 3497 /* thaw frozen port and recover failed devices */ 3498 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3499 return 0; 3500 3501 /* reset at least once if reset is requested */ 3502 if ((ehc->i.action & ATA_EH_RESET) && 3503 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3504 return 0; 3505 3506 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3507 ata_for_each_dev(dev, link, ALL) { 3508 if (dev->class == ATA_DEV_UNKNOWN && 3509 ehc->classes[dev->devno] != ATA_DEV_NONE) 3510 return 0; 3511 } 3512 3513 return 1; 3514 } 3515 3516 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3517 { 3518 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3519 u64 now = get_jiffies_64(); 3520 int *trials = void_arg; 3521 3522 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3523 (ent->timestamp < now - min(now, interval))) 3524 return -1; 3525 3526 (*trials)++; 3527 return 0; 3528 } 3529 3530 static int ata_eh_schedule_probe(struct ata_device *dev) 3531 { 3532 struct ata_eh_context *ehc = &dev->link->eh_context; 3533 struct ata_link *link = ata_dev_phys_link(dev); 3534 int trials = 0; 3535 3536 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3537 (ehc->did_probe_mask & (1 << dev->devno))) 3538 return 0; 3539 3540 ata_eh_detach_dev(dev); 3541 ata_dev_init(dev); 3542 ehc->did_probe_mask |= (1 << dev->devno); 3543 ehc->i.action |= ATA_EH_RESET; 3544 ehc->saved_xfer_mode[dev->devno] = 0; 3545 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3546 3547 /* the link maybe in a deep sleep, wake it up */ 3548 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3549 if (ata_is_host_link(link)) 3550 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3551 ATA_LPM_EMPTY); 3552 else 3553 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3554 ATA_LPM_EMPTY); 3555 } 3556 3557 /* Record and count probe trials on the ering. The specific 3558 * error mask used is irrelevant. Because a successful device 3559 * detection clears the ering, this count accumulates only if 3560 * there are consecutive failed probes. 3561 * 3562 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3563 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3564 * forced to 1.5Gbps. 3565 * 3566 * This is to work around cases where failed link speed 3567 * negotiation results in device misdetection leading to 3568 * infinite DEVXCHG or PHRDY CHG events. 3569 */ 3570 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3571 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3572 3573 if (trials > ATA_EH_PROBE_TRIALS) 3574 sata_down_spd_limit(link, 1); 3575 3576 return 1; 3577 } 3578 3579 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3580 { 3581 struct ata_eh_context *ehc = &dev->link->eh_context; 3582 3583 /* -EAGAIN from EH routine indicates retry without prejudice. 3584 * The requester is responsible for ensuring forward progress. 3585 */ 3586 if (err != -EAGAIN) 3587 ehc->tries[dev->devno]--; 3588 3589 switch (err) { 3590 case -ENODEV: 3591 /* device missing or wrong IDENTIFY data, schedule probing */ 3592 ehc->i.probe_mask |= (1 << dev->devno); 3593 case -EINVAL: 3594 /* give it just one more chance */ 3595 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3596 case -EIO: 3597 if (ehc->tries[dev->devno] == 1) { 3598 /* This is the last chance, better to slow 3599 * down than lose it. 3600 */ 3601 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3602 if (dev->pio_mode > XFER_PIO_0) 3603 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3604 } 3605 } 3606 3607 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3608 /* disable device if it has used up all its chances */ 3609 ata_dev_disable(dev); 3610 3611 /* detach if offline */ 3612 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3613 ata_eh_detach_dev(dev); 3614 3615 /* schedule probe if necessary */ 3616 if (ata_eh_schedule_probe(dev)) { 3617 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3618 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3619 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3620 } 3621 3622 return 1; 3623 } else { 3624 ehc->i.action |= ATA_EH_RESET; 3625 return 0; 3626 } 3627 } 3628 3629 /** 3630 * ata_eh_recover - recover host port after error 3631 * @ap: host port to recover 3632 * @prereset: prereset method (can be NULL) 3633 * @softreset: softreset method (can be NULL) 3634 * @hardreset: hardreset method (can be NULL) 3635 * @postreset: postreset method (can be NULL) 3636 * @r_failed_link: out parameter for failed link 3637 * 3638 * This is the alpha and omega, eum and yang, heart and soul of 3639 * libata exception handling. On entry, actions required to 3640 * recover each link and hotplug requests are recorded in the 3641 * link's eh_context. This function executes all the operations 3642 * with appropriate retrials and fallbacks to resurrect failed 3643 * devices, detach goners and greet newcomers. 3644 * 3645 * LOCKING: 3646 * Kernel thread context (may sleep). 3647 * 3648 * RETURNS: 3649 * 0 on success, -errno on failure. 3650 */ 3651 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3652 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3653 ata_postreset_fn_t postreset, 3654 struct ata_link **r_failed_link) 3655 { 3656 struct ata_link *link; 3657 struct ata_device *dev; 3658 int rc, nr_fails; 3659 unsigned long flags, deadline; 3660 3661 DPRINTK("ENTER\n"); 3662 3663 /* prep for recovery */ 3664 ata_for_each_link(link, ap, EDGE) { 3665 struct ata_eh_context *ehc = &link->eh_context; 3666 3667 /* re-enable link? */ 3668 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3669 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3670 spin_lock_irqsave(ap->lock, flags); 3671 link->flags &= ~ATA_LFLAG_DISABLED; 3672 spin_unlock_irqrestore(ap->lock, flags); 3673 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3674 } 3675 3676 ata_for_each_dev(dev, link, ALL) { 3677 if (link->flags & ATA_LFLAG_NO_RETRY) 3678 ehc->tries[dev->devno] = 1; 3679 else 3680 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3681 3682 /* collect port action mask recorded in dev actions */ 3683 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3684 ~ATA_EH_PERDEV_MASK; 3685 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3686 3687 /* process hotplug request */ 3688 if (dev->flags & ATA_DFLAG_DETACH) 3689 ata_eh_detach_dev(dev); 3690 3691 /* schedule probe if necessary */ 3692 if (!ata_dev_enabled(dev)) 3693 ata_eh_schedule_probe(dev); 3694 } 3695 } 3696 3697 retry: 3698 rc = 0; 3699 3700 /* if UNLOADING, finish immediately */ 3701 if (ap->pflags & ATA_PFLAG_UNLOADING) 3702 goto out; 3703 3704 /* prep for EH */ 3705 ata_for_each_link(link, ap, EDGE) { 3706 struct ata_eh_context *ehc = &link->eh_context; 3707 3708 /* skip EH if possible. */ 3709 if (ata_eh_skip_recovery(link)) 3710 ehc->i.action = 0; 3711 3712 ata_for_each_dev(dev, link, ALL) 3713 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3714 } 3715 3716 /* reset */ 3717 ata_for_each_link(link, ap, EDGE) { 3718 struct ata_eh_context *ehc = &link->eh_context; 3719 3720 if (!(ehc->i.action & ATA_EH_RESET)) 3721 continue; 3722 3723 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3724 prereset, softreset, hardreset, postreset); 3725 if (rc) { 3726 ata_link_err(link, "reset failed, giving up\n"); 3727 goto out; 3728 } 3729 } 3730 3731 do { 3732 unsigned long now; 3733 3734 /* 3735 * clears ATA_EH_PARK in eh_info and resets 3736 * ap->park_req_pending 3737 */ 3738 ata_eh_pull_park_action(ap); 3739 3740 deadline = jiffies; 3741 ata_for_each_link(link, ap, EDGE) { 3742 ata_for_each_dev(dev, link, ALL) { 3743 struct ata_eh_context *ehc = &link->eh_context; 3744 unsigned long tmp; 3745 3746 if (dev->class != ATA_DEV_ATA) 3747 continue; 3748 if (!(ehc->i.dev_action[dev->devno] & 3749 ATA_EH_PARK)) 3750 continue; 3751 tmp = dev->unpark_deadline; 3752 if (time_before(deadline, tmp)) 3753 deadline = tmp; 3754 else if (time_before_eq(tmp, jiffies)) 3755 continue; 3756 if (ehc->unloaded_mask & (1 << dev->devno)) 3757 continue; 3758 3759 ata_eh_park_issue_cmd(dev, 1); 3760 } 3761 } 3762 3763 now = jiffies; 3764 if (time_before_eq(deadline, now)) 3765 break; 3766 3767 ata_eh_release(ap); 3768 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3769 deadline - now); 3770 ata_eh_acquire(ap); 3771 } while (deadline); 3772 ata_for_each_link(link, ap, EDGE) { 3773 ata_for_each_dev(dev, link, ALL) { 3774 if (!(link->eh_context.unloaded_mask & 3775 (1 << dev->devno))) 3776 continue; 3777 3778 ata_eh_park_issue_cmd(dev, 0); 3779 ata_eh_done(link, dev, ATA_EH_PARK); 3780 } 3781 } 3782 3783 /* the rest */ 3784 nr_fails = 0; 3785 ata_for_each_link(link, ap, PMP_FIRST) { 3786 struct ata_eh_context *ehc = &link->eh_context; 3787 3788 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3789 goto config_lpm; 3790 3791 /* revalidate existing devices and attach new ones */ 3792 rc = ata_eh_revalidate_and_attach(link, &dev); 3793 if (rc) 3794 goto rest_fail; 3795 3796 /* if PMP got attached, return, pmp EH will take care of it */ 3797 if (link->device->class == ATA_DEV_PMP) { 3798 ehc->i.action = 0; 3799 return 0; 3800 } 3801 3802 /* configure transfer mode if necessary */ 3803 if (ehc->i.flags & ATA_EHI_SETMODE) { 3804 rc = ata_set_mode(link, &dev); 3805 if (rc) 3806 goto rest_fail; 3807 ehc->i.flags &= ~ATA_EHI_SETMODE; 3808 } 3809 3810 /* If reset has been issued, clear UA to avoid 3811 * disrupting the current users of the device. 3812 */ 3813 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3814 ata_for_each_dev(dev, link, ALL) { 3815 if (dev->class != ATA_DEV_ATAPI) 3816 continue; 3817 rc = atapi_eh_clear_ua(dev); 3818 if (rc) 3819 goto rest_fail; 3820 } 3821 } 3822 3823 /* retry flush if necessary */ 3824 ata_for_each_dev(dev, link, ALL) { 3825 if (dev->class != ATA_DEV_ATA) 3826 continue; 3827 rc = ata_eh_maybe_retry_flush(dev); 3828 if (rc) 3829 goto rest_fail; 3830 } 3831 3832 config_lpm: 3833 /* configure link power saving */ 3834 if (link->lpm_policy != ap->target_lpm_policy) { 3835 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); 3836 if (rc) 3837 goto rest_fail; 3838 } 3839 3840 /* this link is okay now */ 3841 ehc->i.flags = 0; 3842 continue; 3843 3844 rest_fail: 3845 nr_fails++; 3846 if (dev) 3847 ata_eh_handle_dev_fail(dev, rc); 3848 3849 if (ap->pflags & ATA_PFLAG_FROZEN) { 3850 /* PMP reset requires working host port. 3851 * Can't retry if it's frozen. 3852 */ 3853 if (sata_pmp_attached(ap)) 3854 goto out; 3855 break; 3856 } 3857 } 3858 3859 if (nr_fails) 3860 goto retry; 3861 3862 out: 3863 if (rc && r_failed_link) 3864 *r_failed_link = link; 3865 3866 DPRINTK("EXIT, rc=%d\n", rc); 3867 return rc; 3868 } 3869 3870 /** 3871 * ata_eh_finish - finish up EH 3872 * @ap: host port to finish EH for 3873 * 3874 * Recovery is complete. Clean up EH states and retry or finish 3875 * failed qcs. 3876 * 3877 * LOCKING: 3878 * None. 3879 */ 3880 void ata_eh_finish(struct ata_port *ap) 3881 { 3882 int tag; 3883 3884 /* retry or finish qcs */ 3885 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 3886 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 3887 3888 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3889 continue; 3890 3891 if (qc->err_mask) { 3892 /* FIXME: Once EH migration is complete, 3893 * generate sense data in this function, 3894 * considering both err_mask and tf. 3895 */ 3896 if (qc->flags & ATA_QCFLAG_RETRY) 3897 ata_eh_qc_retry(qc); 3898 else 3899 ata_eh_qc_complete(qc); 3900 } else { 3901 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3902 ata_eh_qc_complete(qc); 3903 } else { 3904 /* feed zero TF to sense generation */ 3905 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3906 ata_eh_qc_retry(qc); 3907 } 3908 } 3909 } 3910 3911 /* make sure nr_active_links is zero after EH */ 3912 WARN_ON(ap->nr_active_links); 3913 ap->nr_active_links = 0; 3914 } 3915 3916 /** 3917 * ata_do_eh - do standard error handling 3918 * @ap: host port to handle error for 3919 * 3920 * @prereset: prereset method (can be NULL) 3921 * @softreset: softreset method (can be NULL) 3922 * @hardreset: hardreset method (can be NULL) 3923 * @postreset: postreset method (can be NULL) 3924 * 3925 * Perform standard error handling sequence. 3926 * 3927 * LOCKING: 3928 * Kernel thread context (may sleep). 3929 */ 3930 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 3931 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3932 ata_postreset_fn_t postreset) 3933 { 3934 struct ata_device *dev; 3935 int rc; 3936 3937 ata_eh_autopsy(ap); 3938 ata_eh_report(ap); 3939 3940 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 3941 NULL); 3942 if (rc) { 3943 ata_for_each_dev(dev, &ap->link, ALL) 3944 ata_dev_disable(dev); 3945 } 3946 3947 ata_eh_finish(ap); 3948 } 3949 3950 /** 3951 * ata_std_error_handler - standard error handler 3952 * @ap: host port to handle error for 3953 * 3954 * Standard error handler 3955 * 3956 * LOCKING: 3957 * Kernel thread context (may sleep). 3958 */ 3959 void ata_std_error_handler(struct ata_port *ap) 3960 { 3961 struct ata_port_operations *ops = ap->ops; 3962 ata_reset_fn_t hardreset = ops->hardreset; 3963 3964 /* ignore built-in hardreset if SCR access is not available */ 3965 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link)) 3966 hardreset = NULL; 3967 3968 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 3969 } 3970 3971 #ifdef CONFIG_PM 3972 /** 3973 * ata_eh_handle_port_suspend - perform port suspend operation 3974 * @ap: port to suspend 3975 * 3976 * Suspend @ap. 3977 * 3978 * LOCKING: 3979 * Kernel thread context (may sleep). 3980 */ 3981 static void ata_eh_handle_port_suspend(struct ata_port *ap) 3982 { 3983 unsigned long flags; 3984 int rc = 0; 3985 3986 /* are we suspending? */ 3987 spin_lock_irqsave(ap->lock, flags); 3988 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3989 ap->pm_mesg.event == PM_EVENT_ON) { 3990 spin_unlock_irqrestore(ap->lock, flags); 3991 return; 3992 } 3993 spin_unlock_irqrestore(ap->lock, flags); 3994 3995 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 3996 3997 /* tell ACPI we're suspending */ 3998 rc = ata_acpi_on_suspend(ap); 3999 if (rc) 4000 goto out; 4001 4002 /* suspend */ 4003 ata_eh_freeze_port(ap); 4004 4005 if (ap->ops->port_suspend) 4006 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4007 4008 ata_acpi_set_state(ap, PMSG_SUSPEND); 4009 out: 4010 /* report result */ 4011 spin_lock_irqsave(ap->lock, flags); 4012 4013 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4014 if (rc == 0) 4015 ap->pflags |= ATA_PFLAG_SUSPENDED; 4016 else if (ap->pflags & ATA_PFLAG_FROZEN) 4017 ata_port_schedule_eh(ap); 4018 4019 if (ap->pm_result) { 4020 *ap->pm_result = rc; 4021 ap->pm_result = NULL; 4022 } 4023 4024 spin_unlock_irqrestore(ap->lock, flags); 4025 4026 return; 4027 } 4028 4029 /** 4030 * ata_eh_handle_port_resume - perform port resume operation 4031 * @ap: port to resume 4032 * 4033 * Resume @ap. 4034 * 4035 * LOCKING: 4036 * Kernel thread context (may sleep). 4037 */ 4038 static void ata_eh_handle_port_resume(struct ata_port *ap) 4039 { 4040 struct ata_link *link; 4041 struct ata_device *dev; 4042 unsigned long flags; 4043 int rc = 0; 4044 4045 /* are we resuming? */ 4046 spin_lock_irqsave(ap->lock, flags); 4047 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4048 ap->pm_mesg.event != PM_EVENT_ON) { 4049 spin_unlock_irqrestore(ap->lock, flags); 4050 return; 4051 } 4052 spin_unlock_irqrestore(ap->lock, flags); 4053 4054 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4055 4056 /* 4057 * Error timestamps are in jiffies which doesn't run while 4058 * suspended and PHY events during resume isn't too uncommon. 4059 * When the two are combined, it can lead to unnecessary speed 4060 * downs if the machine is suspended and resumed repeatedly. 4061 * Clear error history. 4062 */ 4063 ata_for_each_link(link, ap, HOST_FIRST) 4064 ata_for_each_dev(dev, link, ALL) 4065 ata_ering_clear(&dev->ering); 4066 4067 ata_acpi_set_state(ap, PMSG_ON); 4068 4069 if (ap->ops->port_resume) 4070 rc = ap->ops->port_resume(ap); 4071 4072 /* tell ACPI that we're resuming */ 4073 ata_acpi_on_resume(ap); 4074 4075 /* report result */ 4076 spin_lock_irqsave(ap->lock, flags); 4077 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4078 if (ap->pm_result) { 4079 *ap->pm_result = rc; 4080 ap->pm_result = NULL; 4081 } 4082 spin_unlock_irqrestore(ap->lock, flags); 4083 } 4084 #endif /* CONFIG_PM */ 4085