1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Tejun Heo <tj@kernel.org> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/driver-api/libata.rst 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/export.h> 38 #include <linux/pci.h> 39 #include <scsi/scsi.h> 40 #include <scsi/scsi_host.h> 41 #include <scsi/scsi_eh.h> 42 #include <scsi/scsi_device.h> 43 #include <scsi/scsi_cmnd.h> 44 #include <scsi/scsi_dbg.h> 45 #include "../scsi/scsi_transport_api.h" 46 47 #include <linux/libata.h> 48 49 #include <trace/events/libata.h> 50 #include "libata.h" 51 52 enum { 53 /* speed down verdicts */ 54 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 55 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 56 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 57 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 58 59 /* error flags */ 60 ATA_EFLAG_IS_IO = (1 << 0), 61 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 62 ATA_EFLAG_OLD_ER = (1 << 31), 63 64 /* error categories */ 65 ATA_ECAT_NONE = 0, 66 ATA_ECAT_ATA_BUS = 1, 67 ATA_ECAT_TOUT_HSM = 2, 68 ATA_ECAT_UNK_DEV = 3, 69 ATA_ECAT_DUBIOUS_NONE = 4, 70 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 71 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 72 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 73 ATA_ECAT_NR = 8, 74 75 ATA_EH_CMD_DFL_TIMEOUT = 5000, 76 77 /* always put at least this amount of time between resets */ 78 ATA_EH_RESET_COOL_DOWN = 5000, 79 80 /* Waiting in ->prereset can never be reliable. It's 81 * sometimes nice to wait there but it can't be depended upon; 82 * otherwise, we wouldn't be resetting. Just give it enough 83 * time for most drives to spin up. 84 */ 85 ATA_EH_PRERESET_TIMEOUT = 10000, 86 ATA_EH_FASTDRAIN_INTERVAL = 3000, 87 88 ATA_EH_UA_TRIES = 5, 89 90 /* probe speed down parameters, see ata_eh_schedule_probe() */ 91 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 92 ATA_EH_PROBE_TRIALS = 2, 93 }; 94 95 /* The following table determines how we sequence resets. Each entry 96 * represents timeout for that try. The first try can be soft or 97 * hardreset. All others are hardreset if available. In most cases 98 * the first reset w/ 10sec timeout should succeed. Following entries 99 * are mostly for error handling, hotplug and those outlier devices that 100 * take an exceptionally long time to recover from reset. 101 */ 102 static const unsigned long ata_eh_reset_timeouts[] = { 103 10000, /* most drives spin up by 10sec */ 104 10000, /* > 99% working drives spin up before 20sec */ 105 35000, /* give > 30 secs of idleness for outlier devices */ 106 5000, /* and sweet one last chance */ 107 ULONG_MAX, /* > 1 min has elapsed, give up */ 108 }; 109 110 static const unsigned long ata_eh_identify_timeouts[] = { 111 5000, /* covers > 99% of successes and not too boring on failures */ 112 10000, /* combined time till here is enough even for media access */ 113 30000, /* for true idiots */ 114 ULONG_MAX, 115 }; 116 117 static const unsigned long ata_eh_flush_timeouts[] = { 118 15000, /* be generous with flush */ 119 15000, /* ditto */ 120 30000, /* and even more generous */ 121 ULONG_MAX, 122 }; 123 124 static const unsigned long ata_eh_other_timeouts[] = { 125 5000, /* same rationale as identify timeout */ 126 10000, /* ditto */ 127 /* but no merciful 30sec for other commands, it just isn't worth it */ 128 ULONG_MAX, 129 }; 130 131 struct ata_eh_cmd_timeout_ent { 132 const u8 *commands; 133 const unsigned long *timeouts; 134 }; 135 136 /* The following table determines timeouts to use for EH internal 137 * commands. Each table entry is a command class and matches the 138 * commands the entry applies to and the timeout table to use. 139 * 140 * On the retry after a command timed out, the next timeout value from 141 * the table is used. If the table doesn't contain further entries, 142 * the last value is used. 143 * 144 * ehc->cmd_timeout_idx keeps track of which timeout to use per 145 * command class, so if SET_FEATURES times out on the first try, the 146 * next try will use the second timeout value only for that class. 147 */ 148 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 149 static const struct ata_eh_cmd_timeout_ent 150 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 151 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 152 .timeouts = ata_eh_identify_timeouts, }, 153 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 154 .timeouts = ata_eh_other_timeouts, }, 155 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 156 .timeouts = ata_eh_other_timeouts, }, 157 { .commands = CMDS(ATA_CMD_SET_FEATURES), 158 .timeouts = ata_eh_other_timeouts, }, 159 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 160 .timeouts = ata_eh_other_timeouts, }, 161 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 162 .timeouts = ata_eh_flush_timeouts }, 163 }; 164 #undef CMDS 165 166 static void __ata_port_freeze(struct ata_port *ap); 167 #ifdef CONFIG_PM 168 static void ata_eh_handle_port_suspend(struct ata_port *ap); 169 static void ata_eh_handle_port_resume(struct ata_port *ap); 170 #else /* CONFIG_PM */ 171 static void ata_eh_handle_port_suspend(struct ata_port *ap) 172 { } 173 174 static void ata_eh_handle_port_resume(struct ata_port *ap) 175 { } 176 #endif /* CONFIG_PM */ 177 178 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, 179 const char *fmt, va_list args) 180 { 181 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 182 ATA_EH_DESC_LEN - ehi->desc_len, 183 fmt, args); 184 } 185 186 /** 187 * __ata_ehi_push_desc - push error description without adding separator 188 * @ehi: target EHI 189 * @fmt: printf format string 190 * 191 * Format string according to @fmt and append it to @ehi->desc. 192 * 193 * LOCKING: 194 * spin_lock_irqsave(host lock) 195 */ 196 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 197 { 198 va_list args; 199 200 va_start(args, fmt); 201 __ata_ehi_pushv_desc(ehi, fmt, args); 202 va_end(args); 203 } 204 205 /** 206 * ata_ehi_push_desc - push error description with separator 207 * @ehi: target EHI 208 * @fmt: printf format string 209 * 210 * Format string according to @fmt and append it to @ehi->desc. 211 * If @ehi->desc is not empty, ", " is added in-between. 212 * 213 * LOCKING: 214 * spin_lock_irqsave(host lock) 215 */ 216 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 217 { 218 va_list args; 219 220 if (ehi->desc_len) 221 __ata_ehi_push_desc(ehi, ", "); 222 223 va_start(args, fmt); 224 __ata_ehi_pushv_desc(ehi, fmt, args); 225 va_end(args); 226 } 227 228 /** 229 * ata_ehi_clear_desc - clean error description 230 * @ehi: target EHI 231 * 232 * Clear @ehi->desc. 233 * 234 * LOCKING: 235 * spin_lock_irqsave(host lock) 236 */ 237 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 238 { 239 ehi->desc[0] = '\0'; 240 ehi->desc_len = 0; 241 } 242 243 /** 244 * ata_port_desc - append port description 245 * @ap: target ATA port 246 * @fmt: printf format string 247 * 248 * Format string according to @fmt and append it to port 249 * description. If port description is not empty, " " is added 250 * in-between. This function is to be used while initializing 251 * ata_host. The description is printed on host registration. 252 * 253 * LOCKING: 254 * None. 255 */ 256 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 257 { 258 va_list args; 259 260 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 261 262 if (ap->link.eh_info.desc_len) 263 __ata_ehi_push_desc(&ap->link.eh_info, " "); 264 265 va_start(args, fmt); 266 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 267 va_end(args); 268 } 269 270 #ifdef CONFIG_PCI 271 272 /** 273 * ata_port_pbar_desc - append PCI BAR description 274 * @ap: target ATA port 275 * @bar: target PCI BAR 276 * @offset: offset into PCI BAR 277 * @name: name of the area 278 * 279 * If @offset is negative, this function formats a string which 280 * contains the name, address, size and type of the BAR and 281 * appends it to the port description. If @offset is zero or 282 * positive, only name and offsetted address is appended. 283 * 284 * LOCKING: 285 * None. 286 */ 287 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 288 const char *name) 289 { 290 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 291 char *type = ""; 292 unsigned long long start, len; 293 294 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 295 type = "m"; 296 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 297 type = "i"; 298 299 start = (unsigned long long)pci_resource_start(pdev, bar); 300 len = (unsigned long long)pci_resource_len(pdev, bar); 301 302 if (offset < 0) 303 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 304 else 305 ata_port_desc(ap, "%s 0x%llx", name, 306 start + (unsigned long long)offset); 307 } 308 309 #endif /* CONFIG_PCI */ 310 311 static int ata_lookup_timeout_table(u8 cmd) 312 { 313 int i; 314 315 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 316 const u8 *cur; 317 318 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 319 if (*cur == cmd) 320 return i; 321 } 322 323 return -1; 324 } 325 326 /** 327 * ata_internal_cmd_timeout - determine timeout for an internal command 328 * @dev: target device 329 * @cmd: internal command to be issued 330 * 331 * Determine timeout for internal command @cmd for @dev. 332 * 333 * LOCKING: 334 * EH context. 335 * 336 * RETURNS: 337 * Determined timeout. 338 */ 339 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 340 { 341 struct ata_eh_context *ehc = &dev->link->eh_context; 342 int ent = ata_lookup_timeout_table(cmd); 343 int idx; 344 345 if (ent < 0) 346 return ATA_EH_CMD_DFL_TIMEOUT; 347 348 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 349 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 350 } 351 352 /** 353 * ata_internal_cmd_timed_out - notification for internal command timeout 354 * @dev: target device 355 * @cmd: internal command which timed out 356 * 357 * Notify EH that internal command @cmd for @dev timed out. This 358 * function should be called only for commands whose timeouts are 359 * determined using ata_internal_cmd_timeout(). 360 * 361 * LOCKING: 362 * EH context. 363 */ 364 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 365 { 366 struct ata_eh_context *ehc = &dev->link->eh_context; 367 int ent = ata_lookup_timeout_table(cmd); 368 int idx; 369 370 if (ent < 0) 371 return; 372 373 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 374 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 375 ehc->cmd_timeout_idx[dev->devno][ent]++; 376 } 377 378 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 379 unsigned int err_mask) 380 { 381 struct ata_ering_entry *ent; 382 383 WARN_ON(!err_mask); 384 385 ering->cursor++; 386 ering->cursor %= ATA_ERING_SIZE; 387 388 ent = &ering->ring[ering->cursor]; 389 ent->eflags = eflags; 390 ent->err_mask = err_mask; 391 ent->timestamp = get_jiffies_64(); 392 } 393 394 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 395 { 396 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 397 398 if (ent->err_mask) 399 return ent; 400 return NULL; 401 } 402 403 int ata_ering_map(struct ata_ering *ering, 404 int (*map_fn)(struct ata_ering_entry *, void *), 405 void *arg) 406 { 407 int idx, rc = 0; 408 struct ata_ering_entry *ent; 409 410 idx = ering->cursor; 411 do { 412 ent = &ering->ring[idx]; 413 if (!ent->err_mask) 414 break; 415 rc = map_fn(ent, arg); 416 if (rc) 417 break; 418 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 419 } while (idx != ering->cursor); 420 421 return rc; 422 } 423 424 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 425 { 426 ent->eflags |= ATA_EFLAG_OLD_ER; 427 return 0; 428 } 429 430 static void ata_ering_clear(struct ata_ering *ering) 431 { 432 ata_ering_map(ering, ata_ering_clear_cb, NULL); 433 } 434 435 static unsigned int ata_eh_dev_action(struct ata_device *dev) 436 { 437 struct ata_eh_context *ehc = &dev->link->eh_context; 438 439 return ehc->i.action | ehc->i.dev_action[dev->devno]; 440 } 441 442 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 443 struct ata_eh_info *ehi, unsigned int action) 444 { 445 struct ata_device *tdev; 446 447 if (!dev) { 448 ehi->action &= ~action; 449 ata_for_each_dev(tdev, link, ALL) 450 ehi->dev_action[tdev->devno] &= ~action; 451 } else { 452 /* doesn't make sense for port-wide EH actions */ 453 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 454 455 /* break ehi->action into ehi->dev_action */ 456 if (ehi->action & action) { 457 ata_for_each_dev(tdev, link, ALL) 458 ehi->dev_action[tdev->devno] |= 459 ehi->action & action; 460 ehi->action &= ~action; 461 } 462 463 /* turn off the specified per-dev action */ 464 ehi->dev_action[dev->devno] &= ~action; 465 } 466 } 467 468 /** 469 * ata_eh_acquire - acquire EH ownership 470 * @ap: ATA port to acquire EH ownership for 471 * 472 * Acquire EH ownership for @ap. This is the basic exclusion 473 * mechanism for ports sharing a host. Only one port hanging off 474 * the same host can claim the ownership of EH. 475 * 476 * LOCKING: 477 * EH context. 478 */ 479 void ata_eh_acquire(struct ata_port *ap) 480 { 481 mutex_lock(&ap->host->eh_mutex); 482 WARN_ON_ONCE(ap->host->eh_owner); 483 ap->host->eh_owner = current; 484 } 485 486 /** 487 * ata_eh_release - release EH ownership 488 * @ap: ATA port to release EH ownership for 489 * 490 * Release EH ownership for @ap if the caller. The caller must 491 * have acquired EH ownership using ata_eh_acquire() previously. 492 * 493 * LOCKING: 494 * EH context. 495 */ 496 void ata_eh_release(struct ata_port *ap) 497 { 498 WARN_ON_ONCE(ap->host->eh_owner != current); 499 ap->host->eh_owner = NULL; 500 mutex_unlock(&ap->host->eh_mutex); 501 } 502 503 static void ata_eh_unload(struct ata_port *ap) 504 { 505 struct ata_link *link; 506 struct ata_device *dev; 507 unsigned long flags; 508 509 /* Restore SControl IPM and SPD for the next driver and 510 * disable attached devices. 511 */ 512 ata_for_each_link(link, ap, PMP_FIRST) { 513 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 514 ata_for_each_dev(dev, link, ALL) 515 ata_dev_disable(dev); 516 } 517 518 /* freeze and set UNLOADED */ 519 spin_lock_irqsave(ap->lock, flags); 520 521 ata_port_freeze(ap); /* won't be thawed */ 522 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 523 ap->pflags |= ATA_PFLAG_UNLOADED; 524 525 spin_unlock_irqrestore(ap->lock, flags); 526 } 527 528 /** 529 * ata_scsi_error - SCSI layer error handler callback 530 * @host: SCSI host on which error occurred 531 * 532 * Handles SCSI-layer-thrown error events. 533 * 534 * LOCKING: 535 * Inherited from SCSI layer (none, can sleep) 536 * 537 * RETURNS: 538 * Zero. 539 */ 540 void ata_scsi_error(struct Scsi_Host *host) 541 { 542 struct ata_port *ap = ata_shost_to_port(host); 543 unsigned long flags; 544 LIST_HEAD(eh_work_q); 545 546 DPRINTK("ENTER\n"); 547 548 spin_lock_irqsave(host->host_lock, flags); 549 list_splice_init(&host->eh_cmd_q, &eh_work_q); 550 spin_unlock_irqrestore(host->host_lock, flags); 551 552 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 553 554 /* If we timed raced normal completion and there is nothing to 555 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 556 ata_scsi_port_error_handler(host, ap); 557 558 /* finish or retry handled scmd's and clean up */ 559 WARN_ON(!list_empty(&eh_work_q)); 560 561 DPRINTK("EXIT\n"); 562 } 563 564 /** 565 * ata_scsi_cmd_error_handler - error callback for a list of commands 566 * @host: scsi host containing the port 567 * @ap: ATA port within the host 568 * @eh_work_q: list of commands to process 569 * 570 * process the given list of commands and return those finished to the 571 * ap->eh_done_q. This function is the first part of the libata error 572 * handler which processes a given list of failed commands. 573 */ 574 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 575 struct list_head *eh_work_q) 576 { 577 int i; 578 unsigned long flags; 579 580 /* make sure sff pio task is not running */ 581 ata_sff_flush_pio_task(ap); 582 583 /* synchronize with host lock and sort out timeouts */ 584 585 /* For new EH, all qcs are finished in one of three ways - 586 * normal completion, error completion, and SCSI timeout. 587 * Both completions can race against SCSI timeout. When normal 588 * completion wins, the qc never reaches EH. When error 589 * completion wins, the qc has ATA_QCFLAG_FAILED set. 590 * 591 * When SCSI timeout wins, things are a bit more complex. 592 * Normal or error completion can occur after the timeout but 593 * before this point. In such cases, both types of 594 * completions are honored. A scmd is determined to have 595 * timed out iff its associated qc is active and not failed. 596 */ 597 spin_lock_irqsave(ap->lock, flags); 598 if (ap->ops->error_handler) { 599 struct scsi_cmnd *scmd, *tmp; 600 int nr_timedout = 0; 601 602 /* This must occur under the ap->lock as we don't want 603 a polled recovery to race the real interrupt handler 604 605 The lost_interrupt handler checks for any completed but 606 non-notified command and completes much like an IRQ handler. 607 608 We then fall into the error recovery code which will treat 609 this as if normal completion won the race */ 610 611 if (ap->ops->lost_interrupt) 612 ap->ops->lost_interrupt(ap); 613 614 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 615 struct ata_queued_cmd *qc; 616 617 for (i = 0; i < ATA_MAX_QUEUE; i++) { 618 qc = __ata_qc_from_tag(ap, i); 619 if (qc->flags & ATA_QCFLAG_ACTIVE && 620 qc->scsicmd == scmd) 621 break; 622 } 623 624 if (i < ATA_MAX_QUEUE) { 625 /* the scmd has an associated qc */ 626 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 627 /* which hasn't failed yet, timeout */ 628 qc->err_mask |= AC_ERR_TIMEOUT; 629 qc->flags |= ATA_QCFLAG_FAILED; 630 nr_timedout++; 631 } 632 } else { 633 /* Normal completion occurred after 634 * SCSI timeout but before this point. 635 * Successfully complete it. 636 */ 637 scmd->retries = scmd->allowed; 638 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 639 } 640 } 641 642 /* If we have timed out qcs. They belong to EH from 643 * this point but the state of the controller is 644 * unknown. Freeze the port to make sure the IRQ 645 * handler doesn't diddle with those qcs. This must 646 * be done atomically w.r.t. setting QCFLAG_FAILED. 647 */ 648 if (nr_timedout) 649 __ata_port_freeze(ap); 650 651 652 /* initialize eh_tries */ 653 ap->eh_tries = ATA_EH_MAX_TRIES; 654 } 655 spin_unlock_irqrestore(ap->lock, flags); 656 657 } 658 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 659 660 /** 661 * ata_scsi_port_error_handler - recover the port after the commands 662 * @host: SCSI host containing the port 663 * @ap: the ATA port 664 * 665 * Handle the recovery of the port @ap after all the commands 666 * have been recovered. 667 */ 668 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 669 { 670 unsigned long flags; 671 672 /* invoke error handler */ 673 if (ap->ops->error_handler) { 674 struct ata_link *link; 675 676 /* acquire EH ownership */ 677 ata_eh_acquire(ap); 678 repeat: 679 /* kill fast drain timer */ 680 del_timer_sync(&ap->fastdrain_timer); 681 682 /* process port resume request */ 683 ata_eh_handle_port_resume(ap); 684 685 /* fetch & clear EH info */ 686 spin_lock_irqsave(ap->lock, flags); 687 688 ata_for_each_link(link, ap, HOST_FIRST) { 689 struct ata_eh_context *ehc = &link->eh_context; 690 struct ata_device *dev; 691 692 memset(&link->eh_context, 0, sizeof(link->eh_context)); 693 link->eh_context.i = link->eh_info; 694 memset(&link->eh_info, 0, sizeof(link->eh_info)); 695 696 ata_for_each_dev(dev, link, ENABLED) { 697 int devno = dev->devno; 698 699 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 700 if (ata_ncq_enabled(dev)) 701 ehc->saved_ncq_enabled |= 1 << devno; 702 } 703 } 704 705 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 706 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 707 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 708 709 spin_unlock_irqrestore(ap->lock, flags); 710 711 /* invoke EH, skip if unloading or suspended */ 712 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 713 ap->ops->error_handler(ap); 714 else { 715 /* if unloading, commence suicide */ 716 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 717 !(ap->pflags & ATA_PFLAG_UNLOADED)) 718 ata_eh_unload(ap); 719 ata_eh_finish(ap); 720 } 721 722 /* process port suspend request */ 723 ata_eh_handle_port_suspend(ap); 724 725 /* Exception might have happened after ->error_handler 726 * recovered the port but before this point. Repeat 727 * EH in such case. 728 */ 729 spin_lock_irqsave(ap->lock, flags); 730 731 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 732 if (--ap->eh_tries) { 733 spin_unlock_irqrestore(ap->lock, flags); 734 goto repeat; 735 } 736 ata_port_err(ap, 737 "EH pending after %d tries, giving up\n", 738 ATA_EH_MAX_TRIES); 739 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 740 } 741 742 /* this run is complete, make sure EH info is clear */ 743 ata_for_each_link(link, ap, HOST_FIRST) 744 memset(&link->eh_info, 0, sizeof(link->eh_info)); 745 746 /* end eh (clear host_eh_scheduled) while holding 747 * ap->lock such that if exception occurs after this 748 * point but before EH completion, SCSI midlayer will 749 * re-initiate EH. 750 */ 751 ap->ops->end_eh(ap); 752 753 spin_unlock_irqrestore(ap->lock, flags); 754 ata_eh_release(ap); 755 } else { 756 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 757 ap->ops->eng_timeout(ap); 758 } 759 760 scsi_eh_flush_done_q(&ap->eh_done_q); 761 762 /* clean up */ 763 spin_lock_irqsave(ap->lock, flags); 764 765 if (ap->pflags & ATA_PFLAG_LOADING) 766 ap->pflags &= ~ATA_PFLAG_LOADING; 767 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && 768 !(ap->flags & ATA_FLAG_SAS_HOST)) 769 schedule_delayed_work(&ap->hotplug_task, 0); 770 771 if (ap->pflags & ATA_PFLAG_RECOVERED) 772 ata_port_info(ap, "EH complete\n"); 773 774 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 775 776 /* tell wait_eh that we're done */ 777 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 778 wake_up_all(&ap->eh_wait_q); 779 780 spin_unlock_irqrestore(ap->lock, flags); 781 } 782 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 783 784 /** 785 * ata_port_wait_eh - Wait for the currently pending EH to complete 786 * @ap: Port to wait EH for 787 * 788 * Wait until the currently pending EH is complete. 789 * 790 * LOCKING: 791 * Kernel thread context (may sleep). 792 */ 793 void ata_port_wait_eh(struct ata_port *ap) 794 { 795 unsigned long flags; 796 DEFINE_WAIT(wait); 797 798 retry: 799 spin_lock_irqsave(ap->lock, flags); 800 801 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 802 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 803 spin_unlock_irqrestore(ap->lock, flags); 804 schedule(); 805 spin_lock_irqsave(ap->lock, flags); 806 } 807 finish_wait(&ap->eh_wait_q, &wait); 808 809 spin_unlock_irqrestore(ap->lock, flags); 810 811 /* make sure SCSI EH is complete */ 812 if (scsi_host_in_recovery(ap->scsi_host)) { 813 ata_msleep(ap, 10); 814 goto retry; 815 } 816 } 817 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 818 819 static int ata_eh_nr_in_flight(struct ata_port *ap) 820 { 821 unsigned int tag; 822 int nr = 0; 823 824 /* count only non-internal commands */ 825 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 826 if (ata_qc_from_tag(ap, tag)) 827 nr++; 828 829 return nr; 830 } 831 832 void ata_eh_fastdrain_timerfn(struct timer_list *t) 833 { 834 struct ata_port *ap = from_timer(ap, t, fastdrain_timer); 835 unsigned long flags; 836 int cnt; 837 838 spin_lock_irqsave(ap->lock, flags); 839 840 cnt = ata_eh_nr_in_flight(ap); 841 842 /* are we done? */ 843 if (!cnt) 844 goto out_unlock; 845 846 if (cnt == ap->fastdrain_cnt) { 847 unsigned int tag; 848 849 /* No progress during the last interval, tag all 850 * in-flight qcs as timed out and freeze the port. 851 */ 852 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 853 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 854 if (qc) 855 qc->err_mask |= AC_ERR_TIMEOUT; 856 } 857 858 ata_port_freeze(ap); 859 } else { 860 /* some qcs have finished, give it another chance */ 861 ap->fastdrain_cnt = cnt; 862 ap->fastdrain_timer.expires = 863 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 864 add_timer(&ap->fastdrain_timer); 865 } 866 867 out_unlock: 868 spin_unlock_irqrestore(ap->lock, flags); 869 } 870 871 /** 872 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 873 * @ap: target ATA port 874 * @fastdrain: activate fast drain 875 * 876 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 877 * is non-zero and EH wasn't pending before. Fast drain ensures 878 * that EH kicks in in timely manner. 879 * 880 * LOCKING: 881 * spin_lock_irqsave(host lock) 882 */ 883 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 884 { 885 int cnt; 886 887 /* already scheduled? */ 888 if (ap->pflags & ATA_PFLAG_EH_PENDING) 889 return; 890 891 ap->pflags |= ATA_PFLAG_EH_PENDING; 892 893 if (!fastdrain) 894 return; 895 896 /* do we have in-flight qcs? */ 897 cnt = ata_eh_nr_in_flight(ap); 898 if (!cnt) 899 return; 900 901 /* activate fast drain */ 902 ap->fastdrain_cnt = cnt; 903 ap->fastdrain_timer.expires = 904 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 905 add_timer(&ap->fastdrain_timer); 906 } 907 908 /** 909 * ata_qc_schedule_eh - schedule qc for error handling 910 * @qc: command to schedule error handling for 911 * 912 * Schedule error handling for @qc. EH will kick in as soon as 913 * other commands are drained. 914 * 915 * LOCKING: 916 * spin_lock_irqsave(host lock) 917 */ 918 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 919 { 920 struct ata_port *ap = qc->ap; 921 struct request_queue *q = qc->scsicmd->device->request_queue; 922 unsigned long flags; 923 924 WARN_ON(!ap->ops->error_handler); 925 926 qc->flags |= ATA_QCFLAG_FAILED; 927 ata_eh_set_pending(ap, 1); 928 929 /* The following will fail if timeout has already expired. 930 * ata_scsi_error() takes care of such scmds on EH entry. 931 * Note that ATA_QCFLAG_FAILED is unconditionally set after 932 * this function completes. 933 */ 934 spin_lock_irqsave(q->queue_lock, flags); 935 blk_abort_request(qc->scsicmd->request); 936 spin_unlock_irqrestore(q->queue_lock, flags); 937 } 938 939 /** 940 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine 941 * @ap: ATA port to schedule EH for 942 * 943 * LOCKING: inherited from ata_port_schedule_eh 944 * spin_lock_irqsave(host lock) 945 */ 946 void ata_std_sched_eh(struct ata_port *ap) 947 { 948 WARN_ON(!ap->ops->error_handler); 949 950 if (ap->pflags & ATA_PFLAG_INITIALIZING) 951 return; 952 953 ata_eh_set_pending(ap, 1); 954 scsi_schedule_eh(ap->scsi_host); 955 956 DPRINTK("port EH scheduled\n"); 957 } 958 EXPORT_SYMBOL_GPL(ata_std_sched_eh); 959 960 /** 961 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine 962 * @ap: ATA port to end EH for 963 * 964 * In the libata object model there is a 1:1 mapping of ata_port to 965 * shost, so host fields can be directly manipulated under ap->lock, in 966 * the libsas case we need to hold a lock at the ha->level to coordinate 967 * these events. 968 * 969 * LOCKING: 970 * spin_lock_irqsave(host lock) 971 */ 972 void ata_std_end_eh(struct ata_port *ap) 973 { 974 struct Scsi_Host *host = ap->scsi_host; 975 976 host->host_eh_scheduled = 0; 977 } 978 EXPORT_SYMBOL(ata_std_end_eh); 979 980 981 /** 982 * ata_port_schedule_eh - schedule error handling without a qc 983 * @ap: ATA port to schedule EH for 984 * 985 * Schedule error handling for @ap. EH will kick in as soon as 986 * all commands are drained. 987 * 988 * LOCKING: 989 * spin_lock_irqsave(host lock) 990 */ 991 void ata_port_schedule_eh(struct ata_port *ap) 992 { 993 /* see: ata_std_sched_eh, unless you know better */ 994 ap->ops->sched_eh(ap); 995 } 996 997 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 998 { 999 int tag, nr_aborted = 0; 1000 1001 WARN_ON(!ap->ops->error_handler); 1002 1003 /* we're gonna abort all commands, no need for fast drain */ 1004 ata_eh_set_pending(ap, 0); 1005 1006 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1007 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 1008 1009 if (qc && (!link || qc->dev->link == link)) { 1010 qc->flags |= ATA_QCFLAG_FAILED; 1011 ata_qc_complete(qc); 1012 nr_aborted++; 1013 } 1014 } 1015 1016 if (!nr_aborted) 1017 ata_port_schedule_eh(ap); 1018 1019 return nr_aborted; 1020 } 1021 1022 /** 1023 * ata_link_abort - abort all qc's on the link 1024 * @link: ATA link to abort qc's for 1025 * 1026 * Abort all active qc's active on @link and schedule EH. 1027 * 1028 * LOCKING: 1029 * spin_lock_irqsave(host lock) 1030 * 1031 * RETURNS: 1032 * Number of aborted qc's. 1033 */ 1034 int ata_link_abort(struct ata_link *link) 1035 { 1036 return ata_do_link_abort(link->ap, link); 1037 } 1038 1039 /** 1040 * ata_port_abort - abort all qc's on the port 1041 * @ap: ATA port to abort qc's for 1042 * 1043 * Abort all active qc's of @ap and schedule EH. 1044 * 1045 * LOCKING: 1046 * spin_lock_irqsave(host_set lock) 1047 * 1048 * RETURNS: 1049 * Number of aborted qc's. 1050 */ 1051 int ata_port_abort(struct ata_port *ap) 1052 { 1053 return ata_do_link_abort(ap, NULL); 1054 } 1055 1056 /** 1057 * __ata_port_freeze - freeze port 1058 * @ap: ATA port to freeze 1059 * 1060 * This function is called when HSM violation or some other 1061 * condition disrupts normal operation of the port. Frozen port 1062 * is not allowed to perform any operation until the port is 1063 * thawed, which usually follows a successful reset. 1064 * 1065 * ap->ops->freeze() callback can be used for freezing the port 1066 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1067 * port cannot be frozen hardware-wise, the interrupt handler 1068 * must ack and clear interrupts unconditionally while the port 1069 * is frozen. 1070 * 1071 * LOCKING: 1072 * spin_lock_irqsave(host lock) 1073 */ 1074 static void __ata_port_freeze(struct ata_port *ap) 1075 { 1076 WARN_ON(!ap->ops->error_handler); 1077 1078 if (ap->ops->freeze) 1079 ap->ops->freeze(ap); 1080 1081 ap->pflags |= ATA_PFLAG_FROZEN; 1082 1083 DPRINTK("ata%u port frozen\n", ap->print_id); 1084 } 1085 1086 /** 1087 * ata_port_freeze - abort & freeze port 1088 * @ap: ATA port to freeze 1089 * 1090 * Abort and freeze @ap. The freeze operation must be called 1091 * first, because some hardware requires special operations 1092 * before the taskfile registers are accessible. 1093 * 1094 * LOCKING: 1095 * spin_lock_irqsave(host lock) 1096 * 1097 * RETURNS: 1098 * Number of aborted commands. 1099 */ 1100 int ata_port_freeze(struct ata_port *ap) 1101 { 1102 int nr_aborted; 1103 1104 WARN_ON(!ap->ops->error_handler); 1105 1106 __ata_port_freeze(ap); 1107 nr_aborted = ata_port_abort(ap); 1108 1109 return nr_aborted; 1110 } 1111 1112 /** 1113 * sata_async_notification - SATA async notification handler 1114 * @ap: ATA port where async notification is received 1115 * 1116 * Handler to be called when async notification via SDB FIS is 1117 * received. This function schedules EH if necessary. 1118 * 1119 * LOCKING: 1120 * spin_lock_irqsave(host lock) 1121 * 1122 * RETURNS: 1123 * 1 if EH is scheduled, 0 otherwise. 1124 */ 1125 int sata_async_notification(struct ata_port *ap) 1126 { 1127 u32 sntf; 1128 int rc; 1129 1130 if (!(ap->flags & ATA_FLAG_AN)) 1131 return 0; 1132 1133 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1134 if (rc == 0) 1135 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1136 1137 if (!sata_pmp_attached(ap) || rc) { 1138 /* PMP is not attached or SNTF is not available */ 1139 if (!sata_pmp_attached(ap)) { 1140 /* PMP is not attached. Check whether ATAPI 1141 * AN is configured. If so, notify media 1142 * change. 1143 */ 1144 struct ata_device *dev = ap->link.device; 1145 1146 if ((dev->class == ATA_DEV_ATAPI) && 1147 (dev->flags & ATA_DFLAG_AN)) 1148 ata_scsi_media_change_notify(dev); 1149 return 0; 1150 } else { 1151 /* PMP is attached but SNTF is not available. 1152 * ATAPI async media change notification is 1153 * not used. The PMP must be reporting PHY 1154 * status change, schedule EH. 1155 */ 1156 ata_port_schedule_eh(ap); 1157 return 1; 1158 } 1159 } else { 1160 /* PMP is attached and SNTF is available */ 1161 struct ata_link *link; 1162 1163 /* check and notify ATAPI AN */ 1164 ata_for_each_link(link, ap, EDGE) { 1165 if (!(sntf & (1 << link->pmp))) 1166 continue; 1167 1168 if ((link->device->class == ATA_DEV_ATAPI) && 1169 (link->device->flags & ATA_DFLAG_AN)) 1170 ata_scsi_media_change_notify(link->device); 1171 } 1172 1173 /* If PMP is reporting that PHY status of some 1174 * downstream ports has changed, schedule EH. 1175 */ 1176 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1177 ata_port_schedule_eh(ap); 1178 return 1; 1179 } 1180 1181 return 0; 1182 } 1183 } 1184 1185 /** 1186 * ata_eh_freeze_port - EH helper to freeze port 1187 * @ap: ATA port to freeze 1188 * 1189 * Freeze @ap. 1190 * 1191 * LOCKING: 1192 * None. 1193 */ 1194 void ata_eh_freeze_port(struct ata_port *ap) 1195 { 1196 unsigned long flags; 1197 1198 if (!ap->ops->error_handler) 1199 return; 1200 1201 spin_lock_irqsave(ap->lock, flags); 1202 __ata_port_freeze(ap); 1203 spin_unlock_irqrestore(ap->lock, flags); 1204 } 1205 1206 /** 1207 * ata_port_thaw_port - EH helper to thaw port 1208 * @ap: ATA port to thaw 1209 * 1210 * Thaw frozen port @ap. 1211 * 1212 * LOCKING: 1213 * None. 1214 */ 1215 void ata_eh_thaw_port(struct ata_port *ap) 1216 { 1217 unsigned long flags; 1218 1219 if (!ap->ops->error_handler) 1220 return; 1221 1222 spin_lock_irqsave(ap->lock, flags); 1223 1224 ap->pflags &= ~ATA_PFLAG_FROZEN; 1225 1226 if (ap->ops->thaw) 1227 ap->ops->thaw(ap); 1228 1229 spin_unlock_irqrestore(ap->lock, flags); 1230 1231 DPRINTK("ata%u port thawed\n", ap->print_id); 1232 } 1233 1234 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1235 { 1236 /* nada */ 1237 } 1238 1239 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1240 { 1241 struct ata_port *ap = qc->ap; 1242 struct scsi_cmnd *scmd = qc->scsicmd; 1243 unsigned long flags; 1244 1245 spin_lock_irqsave(ap->lock, flags); 1246 qc->scsidone = ata_eh_scsidone; 1247 __ata_qc_complete(qc); 1248 WARN_ON(ata_tag_valid(qc->tag)); 1249 spin_unlock_irqrestore(ap->lock, flags); 1250 1251 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1252 } 1253 1254 /** 1255 * ata_eh_qc_complete - Complete an active ATA command from EH 1256 * @qc: Command to complete 1257 * 1258 * Indicate to the mid and upper layers that an ATA command has 1259 * completed. To be used from EH. 1260 */ 1261 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1262 { 1263 struct scsi_cmnd *scmd = qc->scsicmd; 1264 scmd->retries = scmd->allowed; 1265 __ata_eh_qc_complete(qc); 1266 } 1267 1268 /** 1269 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1270 * @qc: Command to retry 1271 * 1272 * Indicate to the mid and upper layers that an ATA command 1273 * should be retried. To be used from EH. 1274 * 1275 * SCSI midlayer limits the number of retries to scmd->allowed. 1276 * scmd->allowed is incremented for commands which get retried 1277 * due to unrelated failures (qc->err_mask is zero). 1278 */ 1279 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1280 { 1281 struct scsi_cmnd *scmd = qc->scsicmd; 1282 if (!qc->err_mask) 1283 scmd->allowed++; 1284 __ata_eh_qc_complete(qc); 1285 } 1286 1287 /** 1288 * ata_dev_disable - disable ATA device 1289 * @dev: ATA device to disable 1290 * 1291 * Disable @dev. 1292 * 1293 * Locking: 1294 * EH context. 1295 */ 1296 void ata_dev_disable(struct ata_device *dev) 1297 { 1298 if (!ata_dev_enabled(dev)) 1299 return; 1300 1301 if (ata_msg_drv(dev->link->ap)) 1302 ata_dev_warn(dev, "disabled\n"); 1303 ata_acpi_on_disable(dev); 1304 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1305 dev->class++; 1306 1307 /* From now till the next successful probe, ering is used to 1308 * track probe failures. Clear accumulated device error info. 1309 */ 1310 ata_ering_clear(&dev->ering); 1311 } 1312 1313 /** 1314 * ata_eh_detach_dev - detach ATA device 1315 * @dev: ATA device to detach 1316 * 1317 * Detach @dev. 1318 * 1319 * LOCKING: 1320 * None. 1321 */ 1322 void ata_eh_detach_dev(struct ata_device *dev) 1323 { 1324 struct ata_link *link = dev->link; 1325 struct ata_port *ap = link->ap; 1326 struct ata_eh_context *ehc = &link->eh_context; 1327 unsigned long flags; 1328 1329 ata_dev_disable(dev); 1330 1331 spin_lock_irqsave(ap->lock, flags); 1332 1333 dev->flags &= ~ATA_DFLAG_DETACH; 1334 1335 if (ata_scsi_offline_dev(dev)) { 1336 dev->flags |= ATA_DFLAG_DETACHED; 1337 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1338 } 1339 1340 /* clear per-dev EH info */ 1341 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1342 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1343 ehc->saved_xfer_mode[dev->devno] = 0; 1344 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1345 1346 spin_unlock_irqrestore(ap->lock, flags); 1347 } 1348 1349 /** 1350 * ata_eh_about_to_do - about to perform eh_action 1351 * @link: target ATA link 1352 * @dev: target ATA dev for per-dev action (can be NULL) 1353 * @action: action about to be performed 1354 * 1355 * Called just before performing EH actions to clear related bits 1356 * in @link->eh_info such that eh actions are not unnecessarily 1357 * repeated. 1358 * 1359 * LOCKING: 1360 * None. 1361 */ 1362 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1363 unsigned int action) 1364 { 1365 struct ata_port *ap = link->ap; 1366 struct ata_eh_info *ehi = &link->eh_info; 1367 struct ata_eh_context *ehc = &link->eh_context; 1368 unsigned long flags; 1369 1370 spin_lock_irqsave(ap->lock, flags); 1371 1372 ata_eh_clear_action(link, dev, ehi, action); 1373 1374 /* About to take EH action, set RECOVERED. Ignore actions on 1375 * slave links as master will do them again. 1376 */ 1377 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1378 ap->pflags |= ATA_PFLAG_RECOVERED; 1379 1380 spin_unlock_irqrestore(ap->lock, flags); 1381 } 1382 1383 /** 1384 * ata_eh_done - EH action complete 1385 * @link: ATA link for which EH actions are complete 1386 * @dev: target ATA dev for per-dev action (can be NULL) 1387 * @action: action just completed 1388 * 1389 * Called right after performing EH actions to clear related bits 1390 * in @link->eh_context. 1391 * 1392 * LOCKING: 1393 * None. 1394 */ 1395 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1396 unsigned int action) 1397 { 1398 struct ata_eh_context *ehc = &link->eh_context; 1399 1400 ata_eh_clear_action(link, dev, &ehc->i, action); 1401 } 1402 1403 /** 1404 * ata_err_string - convert err_mask to descriptive string 1405 * @err_mask: error mask to convert to string 1406 * 1407 * Convert @err_mask to descriptive string. Errors are 1408 * prioritized according to severity and only the most severe 1409 * error is reported. 1410 * 1411 * LOCKING: 1412 * None. 1413 * 1414 * RETURNS: 1415 * Descriptive string for @err_mask 1416 */ 1417 static const char *ata_err_string(unsigned int err_mask) 1418 { 1419 if (err_mask & AC_ERR_HOST_BUS) 1420 return "host bus error"; 1421 if (err_mask & AC_ERR_ATA_BUS) 1422 return "ATA bus error"; 1423 if (err_mask & AC_ERR_TIMEOUT) 1424 return "timeout"; 1425 if (err_mask & AC_ERR_HSM) 1426 return "HSM violation"; 1427 if (err_mask & AC_ERR_SYSTEM) 1428 return "internal error"; 1429 if (err_mask & AC_ERR_MEDIA) 1430 return "media error"; 1431 if (err_mask & AC_ERR_INVALID) 1432 return "invalid argument"; 1433 if (err_mask & AC_ERR_DEV) 1434 return "device error"; 1435 return "unknown error"; 1436 } 1437 1438 /** 1439 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1440 * @dev: Device to read log page 10h from 1441 * @tag: Resulting tag of the failed command 1442 * @tf: Resulting taskfile registers of the failed command 1443 * 1444 * Read log page 10h to obtain NCQ error details and clear error 1445 * condition. 1446 * 1447 * LOCKING: 1448 * Kernel thread context (may sleep). 1449 * 1450 * RETURNS: 1451 * 0 on success, -errno otherwise. 1452 */ 1453 static int ata_eh_read_log_10h(struct ata_device *dev, 1454 int *tag, struct ata_taskfile *tf) 1455 { 1456 u8 *buf = dev->link->ap->sector_buf; 1457 unsigned int err_mask; 1458 u8 csum; 1459 int i; 1460 1461 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1); 1462 if (err_mask) 1463 return -EIO; 1464 1465 csum = 0; 1466 for (i = 0; i < ATA_SECT_SIZE; i++) 1467 csum += buf[i]; 1468 if (csum) 1469 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n", 1470 csum); 1471 1472 if (buf[0] & 0x80) 1473 return -ENOENT; 1474 1475 *tag = buf[0] & 0x1f; 1476 1477 tf->command = buf[2]; 1478 tf->feature = buf[3]; 1479 tf->lbal = buf[4]; 1480 tf->lbam = buf[5]; 1481 tf->lbah = buf[6]; 1482 tf->device = buf[7]; 1483 tf->hob_lbal = buf[8]; 1484 tf->hob_lbam = buf[9]; 1485 tf->hob_lbah = buf[10]; 1486 tf->nsect = buf[12]; 1487 tf->hob_nsect = buf[13]; 1488 if (ata_id_has_ncq_autosense(dev->id)) 1489 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; 1490 1491 return 0; 1492 } 1493 1494 /** 1495 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1496 * @dev: target ATAPI device 1497 * @r_sense_key: out parameter for sense_key 1498 * 1499 * Perform ATAPI TEST_UNIT_READY. 1500 * 1501 * LOCKING: 1502 * EH context (may sleep). 1503 * 1504 * RETURNS: 1505 * 0 on success, AC_ERR_* mask on failure. 1506 */ 1507 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1508 { 1509 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1510 struct ata_taskfile tf; 1511 unsigned int err_mask; 1512 1513 ata_tf_init(dev, &tf); 1514 1515 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1516 tf.command = ATA_CMD_PACKET; 1517 tf.protocol = ATAPI_PROT_NODATA; 1518 1519 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1520 if (err_mask == AC_ERR_DEV) 1521 *r_sense_key = tf.feature >> 4; 1522 return err_mask; 1523 } 1524 1525 /** 1526 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT 1527 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to 1528 * @cmd: scsi command for which the sense code should be set 1529 * 1530 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK 1531 * SENSE. This function is an EH helper. 1532 * 1533 * LOCKING: 1534 * Kernel thread context (may sleep). 1535 */ 1536 static void ata_eh_request_sense(struct ata_queued_cmd *qc, 1537 struct scsi_cmnd *cmd) 1538 { 1539 struct ata_device *dev = qc->dev; 1540 struct ata_taskfile tf; 1541 unsigned int err_mask; 1542 1543 if (qc->ap->pflags & ATA_PFLAG_FROZEN) { 1544 ata_dev_warn(dev, "sense data available but port frozen\n"); 1545 return; 1546 } 1547 1548 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID) 1549 return; 1550 1551 if (!ata_id_sense_reporting_enabled(dev->id)) { 1552 ata_dev_warn(qc->dev, "sense data reporting disabled\n"); 1553 return; 1554 } 1555 1556 DPRINTK("ATA request sense\n"); 1557 1558 ata_tf_init(dev, &tf); 1559 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1560 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1561 tf.command = ATA_CMD_REQ_SENSE_DATA; 1562 tf.protocol = ATA_PROT_NODATA; 1563 1564 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 1565 /* Ignore err_mask; ATA_ERR might be set */ 1566 if (tf.command & ATA_SENSE) { 1567 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); 1568 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1569 } else { 1570 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", 1571 tf.command, err_mask); 1572 } 1573 } 1574 1575 /** 1576 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1577 * @dev: device to perform REQUEST_SENSE to 1578 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1579 * @dfl_sense_key: default sense key to use 1580 * 1581 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1582 * SENSE. This function is EH helper. 1583 * 1584 * LOCKING: 1585 * Kernel thread context (may sleep). 1586 * 1587 * RETURNS: 1588 * 0 on success, AC_ERR_* mask on failure 1589 */ 1590 unsigned int atapi_eh_request_sense(struct ata_device *dev, 1591 u8 *sense_buf, u8 dfl_sense_key) 1592 { 1593 u8 cdb[ATAPI_CDB_LEN] = 1594 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1595 struct ata_port *ap = dev->link->ap; 1596 struct ata_taskfile tf; 1597 1598 DPRINTK("ATAPI request sense\n"); 1599 1600 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1601 1602 /* initialize sense_buf with the error register, 1603 * for the case where they are -not- overwritten 1604 */ 1605 sense_buf[0] = 0x70; 1606 sense_buf[2] = dfl_sense_key; 1607 1608 /* some devices time out if garbage left in tf */ 1609 ata_tf_init(dev, &tf); 1610 1611 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1612 tf.command = ATA_CMD_PACKET; 1613 1614 /* is it pointless to prefer PIO for "safety reasons"? */ 1615 if (ap->flags & ATA_FLAG_PIO_DMA) { 1616 tf.protocol = ATAPI_PROT_DMA; 1617 tf.feature |= ATAPI_PKT_DMA; 1618 } else { 1619 tf.protocol = ATAPI_PROT_PIO; 1620 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1621 tf.lbah = 0; 1622 } 1623 1624 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1625 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1626 } 1627 1628 /** 1629 * ata_eh_analyze_serror - analyze SError for a failed port 1630 * @link: ATA link to analyze SError for 1631 * 1632 * Analyze SError if available and further determine cause of 1633 * failure. 1634 * 1635 * LOCKING: 1636 * None. 1637 */ 1638 static void ata_eh_analyze_serror(struct ata_link *link) 1639 { 1640 struct ata_eh_context *ehc = &link->eh_context; 1641 u32 serror = ehc->i.serror; 1642 unsigned int err_mask = 0, action = 0; 1643 u32 hotplug_mask; 1644 1645 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1646 err_mask |= AC_ERR_ATA_BUS; 1647 action |= ATA_EH_RESET; 1648 } 1649 if (serror & SERR_PROTOCOL) { 1650 err_mask |= AC_ERR_HSM; 1651 action |= ATA_EH_RESET; 1652 } 1653 if (serror & SERR_INTERNAL) { 1654 err_mask |= AC_ERR_SYSTEM; 1655 action |= ATA_EH_RESET; 1656 } 1657 1658 /* Determine whether a hotplug event has occurred. Both 1659 * SError.N/X are considered hotplug events for enabled or 1660 * host links. For disabled PMP links, only N bit is 1661 * considered as X bit is left at 1 for link plugging. 1662 */ 1663 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1664 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1665 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1666 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1667 else 1668 hotplug_mask = SERR_PHYRDY_CHG; 1669 1670 if (serror & hotplug_mask) 1671 ata_ehi_hotplugged(&ehc->i); 1672 1673 ehc->i.err_mask |= err_mask; 1674 ehc->i.action |= action; 1675 } 1676 1677 /** 1678 * ata_eh_analyze_ncq_error - analyze NCQ error 1679 * @link: ATA link to analyze NCQ error for 1680 * 1681 * Read log page 10h, determine the offending qc and acquire 1682 * error status TF. For NCQ device errors, all LLDDs have to do 1683 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1684 * care of the rest. 1685 * 1686 * LOCKING: 1687 * Kernel thread context (may sleep). 1688 */ 1689 void ata_eh_analyze_ncq_error(struct ata_link *link) 1690 { 1691 struct ata_port *ap = link->ap; 1692 struct ata_eh_context *ehc = &link->eh_context; 1693 struct ata_device *dev = link->device; 1694 struct ata_queued_cmd *qc; 1695 struct ata_taskfile tf; 1696 int tag, rc; 1697 1698 /* if frozen, we can't do much */ 1699 if (ap->pflags & ATA_PFLAG_FROZEN) 1700 return; 1701 1702 /* is it NCQ device error? */ 1703 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1704 return; 1705 1706 /* has LLDD analyzed already? */ 1707 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1708 qc = __ata_qc_from_tag(ap, tag); 1709 1710 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1711 continue; 1712 1713 if (qc->err_mask) 1714 return; 1715 } 1716 1717 /* okay, this error is ours */ 1718 memset(&tf, 0, sizeof(tf)); 1719 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1720 if (rc) { 1721 ata_link_err(link, "failed to read log page 10h (errno=%d)\n", 1722 rc); 1723 return; 1724 } 1725 1726 if (!(link->sactive & (1 << tag))) { 1727 ata_link_err(link, "log page 10h reported inactive tag %d\n", 1728 tag); 1729 return; 1730 } 1731 1732 /* we've got the perpetrator, condemn it */ 1733 qc = __ata_qc_from_tag(ap, tag); 1734 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1735 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1736 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1737 if ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary) { 1738 char sense_key, asc, ascq; 1739 1740 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; 1741 asc = (qc->result_tf.auxiliary >> 8) & 0xff; 1742 ascq = qc->result_tf.auxiliary & 0xff; 1743 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq); 1744 ata_scsi_set_sense_information(dev, qc->scsicmd, 1745 &qc->result_tf); 1746 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1747 } 1748 1749 ehc->i.err_mask &= ~AC_ERR_DEV; 1750 } 1751 1752 /** 1753 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1754 * @qc: qc to analyze 1755 * @tf: Taskfile registers to analyze 1756 * 1757 * Analyze taskfile of @qc and further determine cause of 1758 * failure. This function also requests ATAPI sense data if 1759 * available. 1760 * 1761 * LOCKING: 1762 * Kernel thread context (may sleep). 1763 * 1764 * RETURNS: 1765 * Determined recovery action 1766 */ 1767 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1768 const struct ata_taskfile *tf) 1769 { 1770 unsigned int tmp, action = 0; 1771 u8 stat = tf->command, err = tf->feature; 1772 1773 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1774 qc->err_mask |= AC_ERR_HSM; 1775 return ATA_EH_RESET; 1776 } 1777 1778 if (stat & (ATA_ERR | ATA_DF)) { 1779 qc->err_mask |= AC_ERR_DEV; 1780 /* 1781 * Sense data reporting does not work if the 1782 * device fault bit is set. 1783 */ 1784 if (stat & ATA_DF) 1785 stat &= ~ATA_SENSE; 1786 } else { 1787 return 0; 1788 } 1789 1790 switch (qc->dev->class) { 1791 case ATA_DEV_ATA: 1792 case ATA_DEV_ZAC: 1793 if (stat & ATA_SENSE) 1794 ata_eh_request_sense(qc, qc->scsicmd); 1795 if (err & ATA_ICRC) 1796 qc->err_mask |= AC_ERR_ATA_BUS; 1797 if (err & (ATA_UNC | ATA_AMNF)) 1798 qc->err_mask |= AC_ERR_MEDIA; 1799 if (err & ATA_IDNF) 1800 qc->err_mask |= AC_ERR_INVALID; 1801 break; 1802 1803 case ATA_DEV_ATAPI: 1804 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1805 tmp = atapi_eh_request_sense(qc->dev, 1806 qc->scsicmd->sense_buffer, 1807 qc->result_tf.feature >> 4); 1808 if (!tmp) 1809 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1810 else 1811 qc->err_mask |= tmp; 1812 } 1813 } 1814 1815 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1816 int ret = scsi_check_sense(qc->scsicmd); 1817 /* 1818 * SUCCESS here means that the sense code could 1819 * evaluated and should be passed to the upper layers 1820 * for correct evaluation. 1821 * FAILED means the sense code could not interpreted 1822 * and the device would need to be reset. 1823 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the 1824 * command would need to be retried. 1825 */ 1826 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { 1827 qc->flags |= ATA_QCFLAG_RETRY; 1828 qc->err_mask |= AC_ERR_OTHER; 1829 } else if (ret != SUCCESS) { 1830 qc->err_mask |= AC_ERR_HSM; 1831 } 1832 } 1833 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1834 action |= ATA_EH_RESET; 1835 1836 return action; 1837 } 1838 1839 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1840 int *xfer_ok) 1841 { 1842 int base = 0; 1843 1844 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1845 *xfer_ok = 1; 1846 1847 if (!*xfer_ok) 1848 base = ATA_ECAT_DUBIOUS_NONE; 1849 1850 if (err_mask & AC_ERR_ATA_BUS) 1851 return base + ATA_ECAT_ATA_BUS; 1852 1853 if (err_mask & AC_ERR_TIMEOUT) 1854 return base + ATA_ECAT_TOUT_HSM; 1855 1856 if (eflags & ATA_EFLAG_IS_IO) { 1857 if (err_mask & AC_ERR_HSM) 1858 return base + ATA_ECAT_TOUT_HSM; 1859 if ((err_mask & 1860 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1861 return base + ATA_ECAT_UNK_DEV; 1862 } 1863 1864 return 0; 1865 } 1866 1867 struct speed_down_verdict_arg { 1868 u64 since; 1869 int xfer_ok; 1870 int nr_errors[ATA_ECAT_NR]; 1871 }; 1872 1873 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1874 { 1875 struct speed_down_verdict_arg *arg = void_arg; 1876 int cat; 1877 1878 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1879 return -1; 1880 1881 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1882 &arg->xfer_ok); 1883 arg->nr_errors[cat]++; 1884 1885 return 0; 1886 } 1887 1888 /** 1889 * ata_eh_speed_down_verdict - Determine speed down verdict 1890 * @dev: Device of interest 1891 * 1892 * This function examines error ring of @dev and determines 1893 * whether NCQ needs to be turned off, transfer speed should be 1894 * stepped down, or falling back to PIO is necessary. 1895 * 1896 * ECAT_ATA_BUS : ATA_BUS error for any command 1897 * 1898 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1899 * IO commands 1900 * 1901 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1902 * 1903 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1904 * data transfer hasn't been verified. 1905 * 1906 * Verdicts are 1907 * 1908 * NCQ_OFF : Turn off NCQ. 1909 * 1910 * SPEED_DOWN : Speed down transfer speed but don't fall back 1911 * to PIO. 1912 * 1913 * FALLBACK_TO_PIO : Fall back to PIO. 1914 * 1915 * Even if multiple verdicts are returned, only one action is 1916 * taken per error. An action triggered by non-DUBIOUS errors 1917 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1918 * This is to expedite speed down decisions right after device is 1919 * initially configured. 1920 * 1921 * The following are speed down rules. #1 and #2 deal with 1922 * DUBIOUS errors. 1923 * 1924 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1925 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1926 * 1927 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1928 * occurred during last 5 mins, NCQ_OFF. 1929 * 1930 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1931 * occurred during last 5 mins, FALLBACK_TO_PIO 1932 * 1933 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1934 * during last 10 mins, NCQ_OFF. 1935 * 1936 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1937 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1938 * 1939 * LOCKING: 1940 * Inherited from caller. 1941 * 1942 * RETURNS: 1943 * OR of ATA_EH_SPDN_* flags. 1944 */ 1945 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1946 { 1947 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1948 u64 j64 = get_jiffies_64(); 1949 struct speed_down_verdict_arg arg; 1950 unsigned int verdict = 0; 1951 1952 /* scan past 5 mins of error history */ 1953 memset(&arg, 0, sizeof(arg)); 1954 arg.since = j64 - min(j64, j5mins); 1955 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1956 1957 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1958 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1959 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1960 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1961 1962 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1963 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1964 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1965 1966 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1967 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1968 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1969 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1970 1971 /* scan past 10 mins of error history */ 1972 memset(&arg, 0, sizeof(arg)); 1973 arg.since = j64 - min(j64, j10mins); 1974 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1975 1976 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1977 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1978 verdict |= ATA_EH_SPDN_NCQ_OFF; 1979 1980 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1981 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1982 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1983 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1984 1985 return verdict; 1986 } 1987 1988 /** 1989 * ata_eh_speed_down - record error and speed down if necessary 1990 * @dev: Failed device 1991 * @eflags: mask of ATA_EFLAG_* flags 1992 * @err_mask: err_mask of the error 1993 * 1994 * Record error and examine error history to determine whether 1995 * adjusting transmission speed is necessary. It also sets 1996 * transmission limits appropriately if such adjustment is 1997 * necessary. 1998 * 1999 * LOCKING: 2000 * Kernel thread context (may sleep). 2001 * 2002 * RETURNS: 2003 * Determined recovery action. 2004 */ 2005 static unsigned int ata_eh_speed_down(struct ata_device *dev, 2006 unsigned int eflags, unsigned int err_mask) 2007 { 2008 struct ata_link *link = ata_dev_phys_link(dev); 2009 int xfer_ok = 0; 2010 unsigned int verdict; 2011 unsigned int action = 0; 2012 2013 /* don't bother if Cat-0 error */ 2014 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 2015 return 0; 2016 2017 /* record error and determine whether speed down is necessary */ 2018 ata_ering_record(&dev->ering, eflags, err_mask); 2019 verdict = ata_eh_speed_down_verdict(dev); 2020 2021 /* turn off NCQ? */ 2022 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 2023 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 2024 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 2025 dev->flags |= ATA_DFLAG_NCQ_OFF; 2026 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 2027 goto done; 2028 } 2029 2030 /* speed down? */ 2031 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 2032 /* speed down SATA link speed if possible */ 2033 if (sata_down_spd_limit(link, 0) == 0) { 2034 action |= ATA_EH_RESET; 2035 goto done; 2036 } 2037 2038 /* lower transfer mode */ 2039 if (dev->spdn_cnt < 2) { 2040 static const int dma_dnxfer_sel[] = 2041 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 2042 static const int pio_dnxfer_sel[] = 2043 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 2044 int sel; 2045 2046 if (dev->xfer_shift != ATA_SHIFT_PIO) 2047 sel = dma_dnxfer_sel[dev->spdn_cnt]; 2048 else 2049 sel = pio_dnxfer_sel[dev->spdn_cnt]; 2050 2051 dev->spdn_cnt++; 2052 2053 if (ata_down_xfermask_limit(dev, sel) == 0) { 2054 action |= ATA_EH_RESET; 2055 goto done; 2056 } 2057 } 2058 } 2059 2060 /* Fall back to PIO? Slowing down to PIO is meaningless for 2061 * SATA ATA devices. Consider it only for PATA and SATAPI. 2062 */ 2063 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 2064 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 2065 (dev->xfer_shift != ATA_SHIFT_PIO)) { 2066 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 2067 dev->spdn_cnt = 0; 2068 action |= ATA_EH_RESET; 2069 goto done; 2070 } 2071 } 2072 2073 return 0; 2074 done: 2075 /* device has been slowed down, blow error history */ 2076 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 2077 ata_ering_clear(&dev->ering); 2078 return action; 2079 } 2080 2081 /** 2082 * ata_eh_worth_retry - analyze error and decide whether to retry 2083 * @qc: qc to possibly retry 2084 * 2085 * Look at the cause of the error and decide if a retry 2086 * might be useful or not. We don't want to retry media errors 2087 * because the drive itself has probably already taken 10-30 seconds 2088 * doing its own internal retries before reporting the failure. 2089 */ 2090 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 2091 { 2092 if (qc->err_mask & AC_ERR_MEDIA) 2093 return 0; /* don't retry media errors */ 2094 if (qc->flags & ATA_QCFLAG_IO) 2095 return 1; /* otherwise retry anything from fs stack */ 2096 if (qc->err_mask & AC_ERR_INVALID) 2097 return 0; /* don't retry these */ 2098 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 2099 } 2100 2101 /** 2102 * ata_eh_link_autopsy - analyze error and determine recovery action 2103 * @link: host link to perform autopsy on 2104 * 2105 * Analyze why @link failed and determine which recovery actions 2106 * are needed. This function also sets more detailed AC_ERR_* 2107 * values and fills sense data for ATAPI CHECK SENSE. 2108 * 2109 * LOCKING: 2110 * Kernel thread context (may sleep). 2111 */ 2112 static void ata_eh_link_autopsy(struct ata_link *link) 2113 { 2114 struct ata_port *ap = link->ap; 2115 struct ata_eh_context *ehc = &link->eh_context; 2116 struct ata_device *dev; 2117 unsigned int all_err_mask = 0, eflags = 0; 2118 int tag; 2119 u32 serror; 2120 int rc; 2121 2122 DPRINTK("ENTER\n"); 2123 2124 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2125 return; 2126 2127 /* obtain and analyze SError */ 2128 rc = sata_scr_read(link, SCR_ERROR, &serror); 2129 if (rc == 0) { 2130 ehc->i.serror |= serror; 2131 ata_eh_analyze_serror(link); 2132 } else if (rc != -EOPNOTSUPP) { 2133 /* SError read failed, force reset and probing */ 2134 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2135 ehc->i.action |= ATA_EH_RESET; 2136 ehc->i.err_mask |= AC_ERR_OTHER; 2137 } 2138 2139 /* analyze NCQ failure */ 2140 ata_eh_analyze_ncq_error(link); 2141 2142 /* any real error trumps AC_ERR_OTHER */ 2143 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2144 ehc->i.err_mask &= ~AC_ERR_OTHER; 2145 2146 all_err_mask |= ehc->i.err_mask; 2147 2148 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2149 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2150 2151 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2152 ata_dev_phys_link(qc->dev) != link) 2153 continue; 2154 2155 /* inherit upper level err_mask */ 2156 qc->err_mask |= ehc->i.err_mask; 2157 2158 /* analyze TF */ 2159 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2160 2161 /* DEV errors are probably spurious in case of ATA_BUS error */ 2162 if (qc->err_mask & AC_ERR_ATA_BUS) 2163 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2164 AC_ERR_INVALID); 2165 2166 /* any real error trumps unknown error */ 2167 if (qc->err_mask & ~AC_ERR_OTHER) 2168 qc->err_mask &= ~AC_ERR_OTHER; 2169 2170 /* SENSE_VALID trumps dev/unknown error and revalidation */ 2171 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2172 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2173 2174 /* determine whether the command is worth retrying */ 2175 if (ata_eh_worth_retry(qc)) 2176 qc->flags |= ATA_QCFLAG_RETRY; 2177 2178 /* accumulate error info */ 2179 ehc->i.dev = qc->dev; 2180 all_err_mask |= qc->err_mask; 2181 if (qc->flags & ATA_QCFLAG_IO) 2182 eflags |= ATA_EFLAG_IS_IO; 2183 trace_ata_eh_link_autopsy_qc(qc); 2184 } 2185 2186 /* enforce default EH actions */ 2187 if (ap->pflags & ATA_PFLAG_FROZEN || 2188 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2189 ehc->i.action |= ATA_EH_RESET; 2190 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2191 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2192 ehc->i.action |= ATA_EH_REVALIDATE; 2193 2194 /* If we have offending qcs and the associated failed device, 2195 * perform per-dev EH action only on the offending device. 2196 */ 2197 if (ehc->i.dev) { 2198 ehc->i.dev_action[ehc->i.dev->devno] |= 2199 ehc->i.action & ATA_EH_PERDEV_MASK; 2200 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2201 } 2202 2203 /* propagate timeout to host link */ 2204 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2205 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2206 2207 /* record error and consider speeding down */ 2208 dev = ehc->i.dev; 2209 if (!dev && ((ata_link_max_devices(link) == 1 && 2210 ata_dev_enabled(link->device)))) 2211 dev = link->device; 2212 2213 if (dev) { 2214 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2215 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2216 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2217 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); 2218 } 2219 DPRINTK("EXIT\n"); 2220 } 2221 2222 /** 2223 * ata_eh_autopsy - analyze error and determine recovery action 2224 * @ap: host port to perform autopsy on 2225 * 2226 * Analyze all links of @ap and determine why they failed and 2227 * which recovery actions are needed. 2228 * 2229 * LOCKING: 2230 * Kernel thread context (may sleep). 2231 */ 2232 void ata_eh_autopsy(struct ata_port *ap) 2233 { 2234 struct ata_link *link; 2235 2236 ata_for_each_link(link, ap, EDGE) 2237 ata_eh_link_autopsy(link); 2238 2239 /* Handle the frigging slave link. Autopsy is done similarly 2240 * but actions and flags are transferred over to the master 2241 * link and handled from there. 2242 */ 2243 if (ap->slave_link) { 2244 struct ata_eh_context *mehc = &ap->link.eh_context; 2245 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2246 2247 /* transfer control flags from master to slave */ 2248 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2249 2250 /* perform autopsy on the slave link */ 2251 ata_eh_link_autopsy(ap->slave_link); 2252 2253 /* transfer actions from slave to master and clear slave */ 2254 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2255 mehc->i.action |= sehc->i.action; 2256 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2257 mehc->i.flags |= sehc->i.flags; 2258 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2259 } 2260 2261 /* Autopsy of fanout ports can affect host link autopsy. 2262 * Perform host link autopsy last. 2263 */ 2264 if (sata_pmp_attached(ap)) 2265 ata_eh_link_autopsy(&ap->link); 2266 } 2267 2268 /** 2269 * ata_get_cmd_descript - get description for ATA command 2270 * @command: ATA command code to get description for 2271 * 2272 * Return a textual description of the given command, or NULL if the 2273 * command is not known. 2274 * 2275 * LOCKING: 2276 * None 2277 */ 2278 const char *ata_get_cmd_descript(u8 command) 2279 { 2280 #ifdef CONFIG_ATA_VERBOSE_ERROR 2281 static const struct 2282 { 2283 u8 command; 2284 const char *text; 2285 } cmd_descr[] = { 2286 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2287 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2288 { ATA_CMD_STANDBY, "STANDBY" }, 2289 { ATA_CMD_IDLE, "IDLE" }, 2290 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2291 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2292 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, 2293 { ATA_CMD_NOP, "NOP" }, 2294 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2295 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2296 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2297 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2298 { ATA_CMD_SERVICE, "SERVICE" }, 2299 { ATA_CMD_READ, "READ DMA" }, 2300 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2301 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2302 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2303 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2304 { ATA_CMD_WRITE, "WRITE DMA" }, 2305 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2306 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2307 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2308 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2309 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2310 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2311 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2312 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2313 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, 2314 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, 2315 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2316 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2317 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2318 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2319 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2320 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2321 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2322 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2323 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2324 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2325 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2326 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2327 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2328 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2329 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2330 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2331 { ATA_CMD_SLEEP, "SLEEP" }, 2332 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2333 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2334 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2335 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2336 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2337 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2338 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2339 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2340 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2341 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, 2342 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2343 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2344 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2345 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2346 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2347 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, 2348 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2349 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, 2350 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2351 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2352 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2353 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2354 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2355 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2356 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2357 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2358 { ATA_CMD_SMART, "SMART" }, 2359 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2360 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2361 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2362 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2363 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2364 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2365 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2366 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2367 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2368 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, 2369 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, 2370 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, 2371 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, 2372 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2373 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2374 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2375 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2376 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2377 { 0, NULL } /* terminate list */ 2378 }; 2379 2380 unsigned int i; 2381 for (i = 0; cmd_descr[i].text; i++) 2382 if (cmd_descr[i].command == command) 2383 return cmd_descr[i].text; 2384 #endif 2385 2386 return NULL; 2387 } 2388 EXPORT_SYMBOL_GPL(ata_get_cmd_descript); 2389 2390 /** 2391 * ata_eh_link_report - report error handling to user 2392 * @link: ATA link EH is going on 2393 * 2394 * Report EH to user. 2395 * 2396 * LOCKING: 2397 * None. 2398 */ 2399 static void ata_eh_link_report(struct ata_link *link) 2400 { 2401 struct ata_port *ap = link->ap; 2402 struct ata_eh_context *ehc = &link->eh_context; 2403 const char *frozen, *desc; 2404 char tries_buf[6] = ""; 2405 int tag, nr_failed = 0; 2406 2407 if (ehc->i.flags & ATA_EHI_QUIET) 2408 return; 2409 2410 desc = NULL; 2411 if (ehc->i.desc[0] != '\0') 2412 desc = ehc->i.desc; 2413 2414 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2415 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2416 2417 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2418 ata_dev_phys_link(qc->dev) != link || 2419 ((qc->flags & ATA_QCFLAG_QUIET) && 2420 qc->err_mask == AC_ERR_DEV)) 2421 continue; 2422 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2423 continue; 2424 2425 nr_failed++; 2426 } 2427 2428 if (!nr_failed && !ehc->i.err_mask) 2429 return; 2430 2431 frozen = ""; 2432 if (ap->pflags & ATA_PFLAG_FROZEN) 2433 frozen = " frozen"; 2434 2435 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2436 snprintf(tries_buf, sizeof(tries_buf), " t%d", 2437 ap->eh_tries); 2438 2439 if (ehc->i.dev) { 2440 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2441 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2442 ehc->i.err_mask, link->sactive, ehc->i.serror, 2443 ehc->i.action, frozen, tries_buf); 2444 if (desc) 2445 ata_dev_err(ehc->i.dev, "%s\n", desc); 2446 } else { 2447 ata_link_err(link, "exception Emask 0x%x " 2448 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2449 ehc->i.err_mask, link->sactive, ehc->i.serror, 2450 ehc->i.action, frozen, tries_buf); 2451 if (desc) 2452 ata_link_err(link, "%s\n", desc); 2453 } 2454 2455 #ifdef CONFIG_ATA_VERBOSE_ERROR 2456 if (ehc->i.serror) 2457 ata_link_err(link, 2458 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2459 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2460 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2461 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2462 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2463 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2464 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2465 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2466 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2467 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2468 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2469 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2470 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2471 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2472 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2473 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2474 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2475 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2476 #endif 2477 2478 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2479 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2480 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2481 char data_buf[20] = ""; 2482 char cdb_buf[70] = ""; 2483 2484 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2485 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2486 continue; 2487 2488 if (qc->dma_dir != DMA_NONE) { 2489 static const char *dma_str[] = { 2490 [DMA_BIDIRECTIONAL] = "bidi", 2491 [DMA_TO_DEVICE] = "out", 2492 [DMA_FROM_DEVICE] = "in", 2493 }; 2494 const char *prot_str = NULL; 2495 2496 switch (qc->tf.protocol) { 2497 case ATA_PROT_UNKNOWN: 2498 prot_str = "unknown"; 2499 break; 2500 case ATA_PROT_NODATA: 2501 prot_str = "nodata"; 2502 break; 2503 case ATA_PROT_PIO: 2504 prot_str = "pio"; 2505 break; 2506 case ATA_PROT_DMA: 2507 prot_str = "dma"; 2508 break; 2509 case ATA_PROT_NCQ: 2510 prot_str = "ncq dma"; 2511 break; 2512 case ATA_PROT_NCQ_NODATA: 2513 prot_str = "ncq nodata"; 2514 break; 2515 case ATAPI_PROT_NODATA: 2516 prot_str = "nodata"; 2517 break; 2518 case ATAPI_PROT_PIO: 2519 prot_str = "pio"; 2520 break; 2521 case ATAPI_PROT_DMA: 2522 prot_str = "dma"; 2523 break; 2524 } 2525 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2526 prot_str, qc->nbytes, dma_str[qc->dma_dir]); 2527 } 2528 2529 if (ata_is_atapi(qc->tf.protocol)) { 2530 const u8 *cdb = qc->cdb; 2531 size_t cdb_len = qc->dev->cdb_len; 2532 2533 if (qc->scsicmd) { 2534 cdb = qc->scsicmd->cmnd; 2535 cdb_len = qc->scsicmd->cmd_len; 2536 } 2537 __scsi_format_command(cdb_buf, sizeof(cdb_buf), 2538 cdb, cdb_len); 2539 } else { 2540 const char *descr = ata_get_cmd_descript(cmd->command); 2541 if (descr) 2542 ata_dev_err(qc->dev, "failed command: %s\n", 2543 descr); 2544 } 2545 2546 ata_dev_err(qc->dev, 2547 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2548 "tag %d%s\n %s" 2549 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2550 "Emask 0x%x (%s)%s\n", 2551 cmd->command, cmd->feature, cmd->nsect, 2552 cmd->lbal, cmd->lbam, cmd->lbah, 2553 cmd->hob_feature, cmd->hob_nsect, 2554 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2555 cmd->device, qc->tag, data_buf, cdb_buf, 2556 res->command, res->feature, res->nsect, 2557 res->lbal, res->lbam, res->lbah, 2558 res->hob_feature, res->hob_nsect, 2559 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2560 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2561 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2562 2563 #ifdef CONFIG_ATA_VERBOSE_ERROR 2564 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2565 ATA_SENSE | ATA_ERR)) { 2566 if (res->command & ATA_BUSY) 2567 ata_dev_err(qc->dev, "status: { Busy }\n"); 2568 else 2569 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", 2570 res->command & ATA_DRDY ? "DRDY " : "", 2571 res->command & ATA_DF ? "DF " : "", 2572 res->command & ATA_DRQ ? "DRQ " : "", 2573 res->command & ATA_SENSE ? "SENSE " : "", 2574 res->command & ATA_ERR ? "ERR " : ""); 2575 } 2576 2577 if (cmd->command != ATA_CMD_PACKET && 2578 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF | 2579 ATA_IDNF | ATA_ABORTED))) 2580 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", 2581 res->feature & ATA_ICRC ? "ICRC " : "", 2582 res->feature & ATA_UNC ? "UNC " : "", 2583 res->feature & ATA_AMNF ? "AMNF " : "", 2584 res->feature & ATA_IDNF ? "IDNF " : "", 2585 res->feature & ATA_ABORTED ? "ABRT " : ""); 2586 #endif 2587 } 2588 } 2589 2590 /** 2591 * ata_eh_report - report error handling to user 2592 * @ap: ATA port to report EH about 2593 * 2594 * Report EH to user. 2595 * 2596 * LOCKING: 2597 * None. 2598 */ 2599 void ata_eh_report(struct ata_port *ap) 2600 { 2601 struct ata_link *link; 2602 2603 ata_for_each_link(link, ap, HOST_FIRST) 2604 ata_eh_link_report(link); 2605 } 2606 2607 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2608 unsigned int *classes, unsigned long deadline, 2609 bool clear_classes) 2610 { 2611 struct ata_device *dev; 2612 2613 if (clear_classes) 2614 ata_for_each_dev(dev, link, ALL) 2615 classes[dev->devno] = ATA_DEV_UNKNOWN; 2616 2617 return reset(link, classes, deadline); 2618 } 2619 2620 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2621 { 2622 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2623 return 0; 2624 if (rc == -EAGAIN) 2625 return 1; 2626 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2627 return 1; 2628 return 0; 2629 } 2630 2631 int ata_eh_reset(struct ata_link *link, int classify, 2632 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2633 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2634 { 2635 struct ata_port *ap = link->ap; 2636 struct ata_link *slave = ap->slave_link; 2637 struct ata_eh_context *ehc = &link->eh_context; 2638 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2639 unsigned int *classes = ehc->classes; 2640 unsigned int lflags = link->flags; 2641 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2642 int max_tries = 0, try = 0; 2643 struct ata_link *failed_link; 2644 struct ata_device *dev; 2645 unsigned long deadline, now; 2646 ata_reset_fn_t reset; 2647 unsigned long flags; 2648 u32 sstatus; 2649 int nr_unknown, rc; 2650 2651 /* 2652 * Prepare to reset 2653 */ 2654 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2655 max_tries++; 2656 if (link->flags & ATA_LFLAG_RST_ONCE) 2657 max_tries = 1; 2658 if (link->flags & ATA_LFLAG_NO_HRST) 2659 hardreset = NULL; 2660 if (link->flags & ATA_LFLAG_NO_SRST) 2661 softreset = NULL; 2662 2663 /* make sure each reset attempt is at least COOL_DOWN apart */ 2664 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2665 now = jiffies; 2666 WARN_ON(time_after(ehc->last_reset, now)); 2667 deadline = ata_deadline(ehc->last_reset, 2668 ATA_EH_RESET_COOL_DOWN); 2669 if (time_before(now, deadline)) 2670 schedule_timeout_uninterruptible(deadline - now); 2671 } 2672 2673 spin_lock_irqsave(ap->lock, flags); 2674 ap->pflags |= ATA_PFLAG_RESETTING; 2675 spin_unlock_irqrestore(ap->lock, flags); 2676 2677 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2678 2679 ata_for_each_dev(dev, link, ALL) { 2680 /* If we issue an SRST then an ATA drive (not ATAPI) 2681 * may change configuration and be in PIO0 timing. If 2682 * we do a hard reset (or are coming from power on) 2683 * this is true for ATA or ATAPI. Until we've set a 2684 * suitable controller mode we should not touch the 2685 * bus as we may be talking too fast. 2686 */ 2687 dev->pio_mode = XFER_PIO_0; 2688 dev->dma_mode = 0xff; 2689 2690 /* If the controller has a pio mode setup function 2691 * then use it to set the chipset to rights. Don't 2692 * touch the DMA setup as that will be dealt with when 2693 * configuring devices. 2694 */ 2695 if (ap->ops->set_piomode) 2696 ap->ops->set_piomode(ap, dev); 2697 } 2698 2699 /* prefer hardreset */ 2700 reset = NULL; 2701 ehc->i.action &= ~ATA_EH_RESET; 2702 if (hardreset) { 2703 reset = hardreset; 2704 ehc->i.action |= ATA_EH_HARDRESET; 2705 } else if (softreset) { 2706 reset = softreset; 2707 ehc->i.action |= ATA_EH_SOFTRESET; 2708 } 2709 2710 if (prereset) { 2711 unsigned long deadline = ata_deadline(jiffies, 2712 ATA_EH_PRERESET_TIMEOUT); 2713 2714 if (slave) { 2715 sehc->i.action &= ~ATA_EH_RESET; 2716 sehc->i.action |= ehc->i.action; 2717 } 2718 2719 rc = prereset(link, deadline); 2720 2721 /* If present, do prereset on slave link too. Reset 2722 * is skipped iff both master and slave links report 2723 * -ENOENT or clear ATA_EH_RESET. 2724 */ 2725 if (slave && (rc == 0 || rc == -ENOENT)) { 2726 int tmp; 2727 2728 tmp = prereset(slave, deadline); 2729 if (tmp != -ENOENT) 2730 rc = tmp; 2731 2732 ehc->i.action |= sehc->i.action; 2733 } 2734 2735 if (rc) { 2736 if (rc == -ENOENT) { 2737 ata_link_dbg(link, "port disabled--ignoring\n"); 2738 ehc->i.action &= ~ATA_EH_RESET; 2739 2740 ata_for_each_dev(dev, link, ALL) 2741 classes[dev->devno] = ATA_DEV_NONE; 2742 2743 rc = 0; 2744 } else 2745 ata_link_err(link, 2746 "prereset failed (errno=%d)\n", 2747 rc); 2748 goto out; 2749 } 2750 2751 /* prereset() might have cleared ATA_EH_RESET. If so, 2752 * bang classes, thaw and return. 2753 */ 2754 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2755 ata_for_each_dev(dev, link, ALL) 2756 classes[dev->devno] = ATA_DEV_NONE; 2757 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2758 ata_is_host_link(link)) 2759 ata_eh_thaw_port(ap); 2760 rc = 0; 2761 goto out; 2762 } 2763 } 2764 2765 retry: 2766 /* 2767 * Perform reset 2768 */ 2769 if (ata_is_host_link(link)) 2770 ata_eh_freeze_port(ap); 2771 2772 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2773 2774 if (reset) { 2775 if (verbose) 2776 ata_link_info(link, "%s resetting link\n", 2777 reset == softreset ? "soft" : "hard"); 2778 2779 /* mark that this EH session started with reset */ 2780 ehc->last_reset = jiffies; 2781 if (reset == hardreset) 2782 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2783 else 2784 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2785 2786 rc = ata_do_reset(link, reset, classes, deadline, true); 2787 if (rc && rc != -EAGAIN) { 2788 failed_link = link; 2789 goto fail; 2790 } 2791 2792 /* hardreset slave link if existent */ 2793 if (slave && reset == hardreset) { 2794 int tmp; 2795 2796 if (verbose) 2797 ata_link_info(slave, "hard resetting link\n"); 2798 2799 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2800 tmp = ata_do_reset(slave, reset, classes, deadline, 2801 false); 2802 switch (tmp) { 2803 case -EAGAIN: 2804 rc = -EAGAIN; 2805 case 0: 2806 break; 2807 default: 2808 failed_link = slave; 2809 rc = tmp; 2810 goto fail; 2811 } 2812 } 2813 2814 /* perform follow-up SRST if necessary */ 2815 if (reset == hardreset && 2816 ata_eh_followup_srst_needed(link, rc)) { 2817 reset = softreset; 2818 2819 if (!reset) { 2820 ata_link_err(link, 2821 "follow-up softreset required but no softreset available\n"); 2822 failed_link = link; 2823 rc = -EINVAL; 2824 goto fail; 2825 } 2826 2827 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2828 rc = ata_do_reset(link, reset, classes, deadline, true); 2829 if (rc) { 2830 failed_link = link; 2831 goto fail; 2832 } 2833 } 2834 } else { 2835 if (verbose) 2836 ata_link_info(link, 2837 "no reset method available, skipping reset\n"); 2838 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2839 lflags |= ATA_LFLAG_ASSUME_ATA; 2840 } 2841 2842 /* 2843 * Post-reset processing 2844 */ 2845 ata_for_each_dev(dev, link, ALL) { 2846 /* After the reset, the device state is PIO 0 and the 2847 * controller state is undefined. Reset also wakes up 2848 * drives from sleeping mode. 2849 */ 2850 dev->pio_mode = XFER_PIO_0; 2851 dev->flags &= ~ATA_DFLAG_SLEEPING; 2852 2853 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2854 continue; 2855 2856 /* apply class override */ 2857 if (lflags & ATA_LFLAG_ASSUME_ATA) 2858 classes[dev->devno] = ATA_DEV_ATA; 2859 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2860 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2861 } 2862 2863 /* record current link speed */ 2864 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2865 link->sata_spd = (sstatus >> 4) & 0xf; 2866 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2867 slave->sata_spd = (sstatus >> 4) & 0xf; 2868 2869 /* thaw the port */ 2870 if (ata_is_host_link(link)) 2871 ata_eh_thaw_port(ap); 2872 2873 /* postreset() should clear hardware SError. Although SError 2874 * is cleared during link resume, clearing SError here is 2875 * necessary as some PHYs raise hotplug events after SRST. 2876 * This introduces race condition where hotplug occurs between 2877 * reset and here. This race is mediated by cross checking 2878 * link onlineness and classification result later. 2879 */ 2880 if (postreset) { 2881 postreset(link, classes); 2882 if (slave) 2883 postreset(slave, classes); 2884 } 2885 2886 /* 2887 * Some controllers can't be frozen very well and may set spurious 2888 * error conditions during reset. Clear accumulated error 2889 * information and re-thaw the port if frozen. As reset is the 2890 * final recovery action and we cross check link onlineness against 2891 * device classification later, no hotplug event is lost by this. 2892 */ 2893 spin_lock_irqsave(link->ap->lock, flags); 2894 memset(&link->eh_info, 0, sizeof(link->eh_info)); 2895 if (slave) 2896 memset(&slave->eh_info, 0, sizeof(link->eh_info)); 2897 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 2898 spin_unlock_irqrestore(link->ap->lock, flags); 2899 2900 if (ap->pflags & ATA_PFLAG_FROZEN) 2901 ata_eh_thaw_port(ap); 2902 2903 /* 2904 * Make sure onlineness and classification result correspond. 2905 * Hotplug could have happened during reset and some 2906 * controllers fail to wait while a drive is spinning up after 2907 * being hotplugged causing misdetection. By cross checking 2908 * link on/offlineness and classification result, those 2909 * conditions can be reliably detected and retried. 2910 */ 2911 nr_unknown = 0; 2912 ata_for_each_dev(dev, link, ALL) { 2913 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2914 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2915 ata_dev_dbg(dev, "link online but device misclassified\n"); 2916 classes[dev->devno] = ATA_DEV_NONE; 2917 nr_unknown++; 2918 } 2919 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2920 if (ata_class_enabled(classes[dev->devno])) 2921 ata_dev_dbg(dev, 2922 "link offline, clearing class %d to NONE\n", 2923 classes[dev->devno]); 2924 classes[dev->devno] = ATA_DEV_NONE; 2925 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2926 ata_dev_dbg(dev, 2927 "link status unknown, clearing UNKNOWN to NONE\n"); 2928 classes[dev->devno] = ATA_DEV_NONE; 2929 } 2930 } 2931 2932 if (classify && nr_unknown) { 2933 if (try < max_tries) { 2934 ata_link_warn(link, 2935 "link online but %d devices misclassified, retrying\n", 2936 nr_unknown); 2937 failed_link = link; 2938 rc = -EAGAIN; 2939 goto fail; 2940 } 2941 ata_link_warn(link, 2942 "link online but %d devices misclassified, " 2943 "device detection might fail\n", nr_unknown); 2944 } 2945 2946 /* reset successful, schedule revalidation */ 2947 ata_eh_done(link, NULL, ATA_EH_RESET); 2948 if (slave) 2949 ata_eh_done(slave, NULL, ATA_EH_RESET); 2950 ehc->last_reset = jiffies; /* update to completion time */ 2951 ehc->i.action |= ATA_EH_REVALIDATE; 2952 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 2953 2954 rc = 0; 2955 out: 2956 /* clear hotplug flag */ 2957 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2958 if (slave) 2959 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2960 2961 spin_lock_irqsave(ap->lock, flags); 2962 ap->pflags &= ~ATA_PFLAG_RESETTING; 2963 spin_unlock_irqrestore(ap->lock, flags); 2964 2965 return rc; 2966 2967 fail: 2968 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2969 if (!ata_is_host_link(link) && 2970 sata_scr_read(link, SCR_STATUS, &sstatus)) 2971 rc = -ERESTART; 2972 2973 if (try >= max_tries) { 2974 /* 2975 * Thaw host port even if reset failed, so that the port 2976 * can be retried on the next phy event. This risks 2977 * repeated EH runs but seems to be a better tradeoff than 2978 * shutting down a port after a botched hotplug attempt. 2979 */ 2980 if (ata_is_host_link(link)) 2981 ata_eh_thaw_port(ap); 2982 goto out; 2983 } 2984 2985 now = jiffies; 2986 if (time_before(now, deadline)) { 2987 unsigned long delta = deadline - now; 2988 2989 ata_link_warn(failed_link, 2990 "reset failed (errno=%d), retrying in %u secs\n", 2991 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2992 2993 ata_eh_release(ap); 2994 while (delta) 2995 delta = schedule_timeout_uninterruptible(delta); 2996 ata_eh_acquire(ap); 2997 } 2998 2999 /* 3000 * While disks spinup behind PMP, some controllers fail sending SRST. 3001 * They need to be reset - as well as the PMP - before retrying. 3002 */ 3003 if (rc == -ERESTART) { 3004 if (ata_is_host_link(link)) 3005 ata_eh_thaw_port(ap); 3006 goto out; 3007 } 3008 3009 if (try == max_tries - 1) { 3010 sata_down_spd_limit(link, 0); 3011 if (slave) 3012 sata_down_spd_limit(slave, 0); 3013 } else if (rc == -EPIPE) 3014 sata_down_spd_limit(failed_link, 0); 3015 3016 if (hardreset) 3017 reset = hardreset; 3018 goto retry; 3019 } 3020 3021 static inline void ata_eh_pull_park_action(struct ata_port *ap) 3022 { 3023 struct ata_link *link; 3024 struct ata_device *dev; 3025 unsigned long flags; 3026 3027 /* 3028 * This function can be thought of as an extended version of 3029 * ata_eh_about_to_do() specially crafted to accommodate the 3030 * requirements of ATA_EH_PARK handling. Since the EH thread 3031 * does not leave the do {} while () loop in ata_eh_recover as 3032 * long as the timeout for a park request to *one* device on 3033 * the port has not expired, and since we still want to pick 3034 * up park requests to other devices on the same port or 3035 * timeout updates for the same device, we have to pull 3036 * ATA_EH_PARK actions from eh_info into eh_context.i 3037 * ourselves at the beginning of each pass over the loop. 3038 * 3039 * Additionally, all write accesses to &ap->park_req_pending 3040 * through reinit_completion() (see below) or complete_all() 3041 * (see ata_scsi_park_store()) are protected by the host lock. 3042 * As a result we have that park_req_pending.done is zero on 3043 * exit from this function, i.e. when ATA_EH_PARK actions for 3044 * *all* devices on port ap have been pulled into the 3045 * respective eh_context structs. If, and only if, 3046 * park_req_pending.done is non-zero by the time we reach 3047 * wait_for_completion_timeout(), another ATA_EH_PARK action 3048 * has been scheduled for at least one of the devices on port 3049 * ap and we have to cycle over the do {} while () loop in 3050 * ata_eh_recover() again. 3051 */ 3052 3053 spin_lock_irqsave(ap->lock, flags); 3054 reinit_completion(&ap->park_req_pending); 3055 ata_for_each_link(link, ap, EDGE) { 3056 ata_for_each_dev(dev, link, ALL) { 3057 struct ata_eh_info *ehi = &link->eh_info; 3058 3059 link->eh_context.i.dev_action[dev->devno] |= 3060 ehi->dev_action[dev->devno] & ATA_EH_PARK; 3061 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 3062 } 3063 } 3064 spin_unlock_irqrestore(ap->lock, flags); 3065 } 3066 3067 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3068 { 3069 struct ata_eh_context *ehc = &dev->link->eh_context; 3070 struct ata_taskfile tf; 3071 unsigned int err_mask; 3072 3073 ata_tf_init(dev, &tf); 3074 if (park) { 3075 ehc->unloaded_mask |= 1 << dev->devno; 3076 tf.command = ATA_CMD_IDLEIMMEDIATE; 3077 tf.feature = 0x44; 3078 tf.lbal = 0x4c; 3079 tf.lbam = 0x4e; 3080 tf.lbah = 0x55; 3081 } else { 3082 ehc->unloaded_mask &= ~(1 << dev->devno); 3083 tf.command = ATA_CMD_CHK_POWER; 3084 } 3085 3086 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3087 tf.protocol = ATA_PROT_NODATA; 3088 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3089 if (park && (err_mask || tf.lbal != 0xc4)) { 3090 ata_dev_err(dev, "head unload failed!\n"); 3091 ehc->unloaded_mask &= ~(1 << dev->devno); 3092 } 3093 } 3094 3095 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3096 struct ata_device **r_failed_dev) 3097 { 3098 struct ata_port *ap = link->ap; 3099 struct ata_eh_context *ehc = &link->eh_context; 3100 struct ata_device *dev; 3101 unsigned int new_mask = 0; 3102 unsigned long flags; 3103 int rc = 0; 3104 3105 DPRINTK("ENTER\n"); 3106 3107 /* For PATA drive side cable detection to work, IDENTIFY must 3108 * be done backwards such that PDIAG- is released by the slave 3109 * device before the master device is identified. 3110 */ 3111 ata_for_each_dev(dev, link, ALL_REVERSE) { 3112 unsigned int action = ata_eh_dev_action(dev); 3113 unsigned int readid_flags = 0; 3114 3115 if (ehc->i.flags & ATA_EHI_DID_RESET) 3116 readid_flags |= ATA_READID_POSTRESET; 3117 3118 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3119 WARN_ON(dev->class == ATA_DEV_PMP); 3120 3121 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3122 rc = -EIO; 3123 goto err; 3124 } 3125 3126 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3127 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3128 readid_flags); 3129 if (rc) 3130 goto err; 3131 3132 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3133 3134 /* Configuration may have changed, reconfigure 3135 * transfer mode. 3136 */ 3137 ehc->i.flags |= ATA_EHI_SETMODE; 3138 3139 /* schedule the scsi_rescan_device() here */ 3140 schedule_work(&(ap->scsi_rescan_task)); 3141 } else if (dev->class == ATA_DEV_UNKNOWN && 3142 ehc->tries[dev->devno] && 3143 ata_class_enabled(ehc->classes[dev->devno])) { 3144 /* Temporarily set dev->class, it will be 3145 * permanently set once all configurations are 3146 * complete. This is necessary because new 3147 * device configuration is done in two 3148 * separate loops. 3149 */ 3150 dev->class = ehc->classes[dev->devno]; 3151 3152 if (dev->class == ATA_DEV_PMP) 3153 rc = sata_pmp_attach(dev); 3154 else 3155 rc = ata_dev_read_id(dev, &dev->class, 3156 readid_flags, dev->id); 3157 3158 /* read_id might have changed class, store and reset */ 3159 ehc->classes[dev->devno] = dev->class; 3160 dev->class = ATA_DEV_UNKNOWN; 3161 3162 switch (rc) { 3163 case 0: 3164 /* clear error info accumulated during probe */ 3165 ata_ering_clear(&dev->ering); 3166 new_mask |= 1 << dev->devno; 3167 break; 3168 case -ENOENT: 3169 /* IDENTIFY was issued to non-existent 3170 * device. No need to reset. Just 3171 * thaw and ignore the device. 3172 */ 3173 ata_eh_thaw_port(ap); 3174 break; 3175 default: 3176 goto err; 3177 } 3178 } 3179 } 3180 3181 /* PDIAG- should have been released, ask cable type if post-reset */ 3182 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3183 if (ap->ops->cable_detect) 3184 ap->cbl = ap->ops->cable_detect(ap); 3185 ata_force_cbl(ap); 3186 } 3187 3188 /* Configure new devices forward such that user doesn't see 3189 * device detection messages backwards. 3190 */ 3191 ata_for_each_dev(dev, link, ALL) { 3192 if (!(new_mask & (1 << dev->devno))) 3193 continue; 3194 3195 dev->class = ehc->classes[dev->devno]; 3196 3197 if (dev->class == ATA_DEV_PMP) 3198 continue; 3199 3200 ehc->i.flags |= ATA_EHI_PRINTINFO; 3201 rc = ata_dev_configure(dev); 3202 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3203 if (rc) { 3204 dev->class = ATA_DEV_UNKNOWN; 3205 goto err; 3206 } 3207 3208 spin_lock_irqsave(ap->lock, flags); 3209 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3210 spin_unlock_irqrestore(ap->lock, flags); 3211 3212 /* new device discovered, configure xfermode */ 3213 ehc->i.flags |= ATA_EHI_SETMODE; 3214 } 3215 3216 return 0; 3217 3218 err: 3219 *r_failed_dev = dev; 3220 DPRINTK("EXIT rc=%d\n", rc); 3221 return rc; 3222 } 3223 3224 /** 3225 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3226 * @link: link on which timings will be programmed 3227 * @r_failed_dev: out parameter for failed device 3228 * 3229 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3230 * ata_set_mode() fails, pointer to the failing device is 3231 * returned in @r_failed_dev. 3232 * 3233 * LOCKING: 3234 * PCI/etc. bus probe sem. 3235 * 3236 * RETURNS: 3237 * 0 on success, negative errno otherwise 3238 */ 3239 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3240 { 3241 struct ata_port *ap = link->ap; 3242 struct ata_device *dev; 3243 int rc; 3244 3245 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3246 ata_for_each_dev(dev, link, ENABLED) { 3247 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3248 struct ata_ering_entry *ent; 3249 3250 ent = ata_ering_top(&dev->ering); 3251 if (ent) 3252 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3253 } 3254 } 3255 3256 /* has private set_mode? */ 3257 if (ap->ops->set_mode) 3258 rc = ap->ops->set_mode(link, r_failed_dev); 3259 else 3260 rc = ata_do_set_mode(link, r_failed_dev); 3261 3262 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3263 ata_for_each_dev(dev, link, ENABLED) { 3264 struct ata_eh_context *ehc = &link->eh_context; 3265 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3266 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3267 3268 if (dev->xfer_mode != saved_xfer_mode || 3269 ata_ncq_enabled(dev) != saved_ncq) 3270 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3271 } 3272 3273 return rc; 3274 } 3275 3276 /** 3277 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3278 * @dev: ATAPI device to clear UA for 3279 * 3280 * Resets and other operations can make an ATAPI device raise 3281 * UNIT ATTENTION which causes the next operation to fail. This 3282 * function clears UA. 3283 * 3284 * LOCKING: 3285 * EH context (may sleep). 3286 * 3287 * RETURNS: 3288 * 0 on success, -errno on failure. 3289 */ 3290 static int atapi_eh_clear_ua(struct ata_device *dev) 3291 { 3292 int i; 3293 3294 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3295 u8 *sense_buffer = dev->link->ap->sector_buf; 3296 u8 sense_key = 0; 3297 unsigned int err_mask; 3298 3299 err_mask = atapi_eh_tur(dev, &sense_key); 3300 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3301 ata_dev_warn(dev, 3302 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3303 err_mask); 3304 return -EIO; 3305 } 3306 3307 if (!err_mask || sense_key != UNIT_ATTENTION) 3308 return 0; 3309 3310 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3311 if (err_mask) { 3312 ata_dev_warn(dev, "failed to clear " 3313 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3314 return -EIO; 3315 } 3316 } 3317 3318 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3319 ATA_EH_UA_TRIES); 3320 3321 return 0; 3322 } 3323 3324 /** 3325 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3326 * @dev: ATA device which may need FLUSH retry 3327 * 3328 * If @dev failed FLUSH, it needs to be reported upper layer 3329 * immediately as it means that @dev failed to remap and already 3330 * lost at least a sector and further FLUSH retrials won't make 3331 * any difference to the lost sector. However, if FLUSH failed 3332 * for other reasons, for example transmission error, FLUSH needs 3333 * to be retried. 3334 * 3335 * This function determines whether FLUSH failure retry is 3336 * necessary and performs it if so. 3337 * 3338 * RETURNS: 3339 * 0 if EH can continue, -errno if EH needs to be repeated. 3340 */ 3341 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3342 { 3343 struct ata_link *link = dev->link; 3344 struct ata_port *ap = link->ap; 3345 struct ata_queued_cmd *qc; 3346 struct ata_taskfile tf; 3347 unsigned int err_mask; 3348 int rc = 0; 3349 3350 /* did flush fail for this device? */ 3351 if (!ata_tag_valid(link->active_tag)) 3352 return 0; 3353 3354 qc = __ata_qc_from_tag(ap, link->active_tag); 3355 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3356 qc->tf.command != ATA_CMD_FLUSH)) 3357 return 0; 3358 3359 /* if the device failed it, it should be reported to upper layers */ 3360 if (qc->err_mask & AC_ERR_DEV) 3361 return 0; 3362 3363 /* flush failed for some other reason, give it another shot */ 3364 ata_tf_init(dev, &tf); 3365 3366 tf.command = qc->tf.command; 3367 tf.flags |= ATA_TFLAG_DEVICE; 3368 tf.protocol = ATA_PROT_NODATA; 3369 3370 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3371 tf.command, qc->err_mask); 3372 3373 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3374 if (!err_mask) { 3375 /* 3376 * FLUSH is complete but there's no way to 3377 * successfully complete a failed command from EH. 3378 * Making sure retry is allowed at least once and 3379 * retrying it should do the trick - whatever was in 3380 * the cache is already on the platter and this won't 3381 * cause infinite loop. 3382 */ 3383 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3384 } else { 3385 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3386 err_mask); 3387 rc = -EIO; 3388 3389 /* if device failed it, report it to upper layers */ 3390 if (err_mask & AC_ERR_DEV) { 3391 qc->err_mask |= AC_ERR_DEV; 3392 qc->result_tf = tf; 3393 if (!(ap->pflags & ATA_PFLAG_FROZEN)) 3394 rc = 0; 3395 } 3396 } 3397 return rc; 3398 } 3399 3400 /** 3401 * ata_eh_set_lpm - configure SATA interface power management 3402 * @link: link to configure power management 3403 * @policy: the link power management policy 3404 * @r_failed_dev: out parameter for failed device 3405 * 3406 * Enable SATA Interface power management. This will enable 3407 * Device Interface Power Management (DIPM) for min_power and 3408 * medium_power_with_dipm policies, and then call driver specific 3409 * callbacks for enabling Host Initiated Power management. 3410 * 3411 * LOCKING: 3412 * EH context. 3413 * 3414 * RETURNS: 3415 * 0 on success, -errno on failure. 3416 */ 3417 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 3418 struct ata_device **r_failed_dev) 3419 { 3420 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3421 struct ata_eh_context *ehc = &link->eh_context; 3422 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3423 enum ata_lpm_policy old_policy = link->lpm_policy; 3424 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; 3425 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3426 unsigned int err_mask; 3427 int rc; 3428 3429 /* if the link or host doesn't do LPM, noop */ 3430 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 3431 return 0; 3432 3433 /* 3434 * DIPM is enabled only for MIN_POWER as some devices 3435 * misbehave when the host NACKs transition to SLUMBER. Order 3436 * device and link configurations such that the host always 3437 * allows DIPM requests. 3438 */ 3439 ata_for_each_dev(dev, link, ENABLED) { 3440 bool hipm = ata_id_has_hipm(dev->id); 3441 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; 3442 3443 /* find the first enabled and LPM enabled devices */ 3444 if (!link_dev) 3445 link_dev = dev; 3446 3447 if (!lpm_dev && (hipm || dipm)) 3448 lpm_dev = dev; 3449 3450 hints &= ~ATA_LPM_EMPTY; 3451 if (!hipm) 3452 hints &= ~ATA_LPM_HIPM; 3453 3454 /* disable DIPM before changing link config */ 3455 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) { 3456 err_mask = ata_dev_set_feature(dev, 3457 SETFEATURES_SATA_DISABLE, SATA_DIPM); 3458 if (err_mask && err_mask != AC_ERR_DEV) { 3459 ata_dev_warn(dev, 3460 "failed to disable DIPM, Emask 0x%x\n", 3461 err_mask); 3462 rc = -EIO; 3463 goto fail; 3464 } 3465 } 3466 } 3467 3468 if (ap) { 3469 rc = ap->ops->set_lpm(link, policy, hints); 3470 if (!rc && ap->slave_link) 3471 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 3472 } else 3473 rc = sata_pmp_set_lpm(link, policy, hints); 3474 3475 /* 3476 * Attribute link config failure to the first (LPM) enabled 3477 * device on the link. 3478 */ 3479 if (rc) { 3480 if (rc == -EOPNOTSUPP) { 3481 link->flags |= ATA_LFLAG_NO_LPM; 3482 return 0; 3483 } 3484 dev = lpm_dev ? lpm_dev : link_dev; 3485 goto fail; 3486 } 3487 3488 /* 3489 * Low level driver acked the transition. Issue DIPM command 3490 * with the new policy set. 3491 */ 3492 link->lpm_policy = policy; 3493 if (ap && ap->slave_link) 3494 ap->slave_link->lpm_policy = policy; 3495 3496 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3497 ata_for_each_dev(dev, link, ENABLED) { 3498 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm && 3499 ata_id_has_dipm(dev->id)) { 3500 err_mask = ata_dev_set_feature(dev, 3501 SETFEATURES_SATA_ENABLE, SATA_DIPM); 3502 if (err_mask && err_mask != AC_ERR_DEV) { 3503 ata_dev_warn(dev, 3504 "failed to enable DIPM, Emask 0x%x\n", 3505 err_mask); 3506 rc = -EIO; 3507 goto fail; 3508 } 3509 } 3510 } 3511 3512 link->last_lpm_change = jiffies; 3513 link->flags |= ATA_LFLAG_CHANGED; 3514 3515 return 0; 3516 3517 fail: 3518 /* restore the old policy */ 3519 link->lpm_policy = old_policy; 3520 if (ap && ap->slave_link) 3521 ap->slave_link->lpm_policy = old_policy; 3522 3523 /* if no device or only one more chance is left, disable LPM */ 3524 if (!dev || ehc->tries[dev->devno] <= 2) { 3525 ata_link_warn(link, "disabling LPM on the link\n"); 3526 link->flags |= ATA_LFLAG_NO_LPM; 3527 } 3528 if (r_failed_dev) 3529 *r_failed_dev = dev; 3530 return rc; 3531 } 3532 3533 int ata_link_nr_enabled(struct ata_link *link) 3534 { 3535 struct ata_device *dev; 3536 int cnt = 0; 3537 3538 ata_for_each_dev(dev, link, ENABLED) 3539 cnt++; 3540 return cnt; 3541 } 3542 3543 static int ata_link_nr_vacant(struct ata_link *link) 3544 { 3545 struct ata_device *dev; 3546 int cnt = 0; 3547 3548 ata_for_each_dev(dev, link, ALL) 3549 if (dev->class == ATA_DEV_UNKNOWN) 3550 cnt++; 3551 return cnt; 3552 } 3553 3554 static int ata_eh_skip_recovery(struct ata_link *link) 3555 { 3556 struct ata_port *ap = link->ap; 3557 struct ata_eh_context *ehc = &link->eh_context; 3558 struct ata_device *dev; 3559 3560 /* skip disabled links */ 3561 if (link->flags & ATA_LFLAG_DISABLED) 3562 return 1; 3563 3564 /* skip if explicitly requested */ 3565 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3566 return 1; 3567 3568 /* thaw frozen port and recover failed devices */ 3569 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3570 return 0; 3571 3572 /* reset at least once if reset is requested */ 3573 if ((ehc->i.action & ATA_EH_RESET) && 3574 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3575 return 0; 3576 3577 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3578 ata_for_each_dev(dev, link, ALL) { 3579 if (dev->class == ATA_DEV_UNKNOWN && 3580 ehc->classes[dev->devno] != ATA_DEV_NONE) 3581 return 0; 3582 } 3583 3584 return 1; 3585 } 3586 3587 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3588 { 3589 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3590 u64 now = get_jiffies_64(); 3591 int *trials = void_arg; 3592 3593 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3594 (ent->timestamp < now - min(now, interval))) 3595 return -1; 3596 3597 (*trials)++; 3598 return 0; 3599 } 3600 3601 static int ata_eh_schedule_probe(struct ata_device *dev) 3602 { 3603 struct ata_eh_context *ehc = &dev->link->eh_context; 3604 struct ata_link *link = ata_dev_phys_link(dev); 3605 int trials = 0; 3606 3607 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3608 (ehc->did_probe_mask & (1 << dev->devno))) 3609 return 0; 3610 3611 ata_eh_detach_dev(dev); 3612 ata_dev_init(dev); 3613 ehc->did_probe_mask |= (1 << dev->devno); 3614 ehc->i.action |= ATA_EH_RESET; 3615 ehc->saved_xfer_mode[dev->devno] = 0; 3616 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3617 3618 /* the link maybe in a deep sleep, wake it up */ 3619 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3620 if (ata_is_host_link(link)) 3621 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3622 ATA_LPM_EMPTY); 3623 else 3624 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3625 ATA_LPM_EMPTY); 3626 } 3627 3628 /* Record and count probe trials on the ering. The specific 3629 * error mask used is irrelevant. Because a successful device 3630 * detection clears the ering, this count accumulates only if 3631 * there are consecutive failed probes. 3632 * 3633 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3634 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3635 * forced to 1.5Gbps. 3636 * 3637 * This is to work around cases where failed link speed 3638 * negotiation results in device misdetection leading to 3639 * infinite DEVXCHG or PHRDY CHG events. 3640 */ 3641 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3642 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3643 3644 if (trials > ATA_EH_PROBE_TRIALS) 3645 sata_down_spd_limit(link, 1); 3646 3647 return 1; 3648 } 3649 3650 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3651 { 3652 struct ata_eh_context *ehc = &dev->link->eh_context; 3653 3654 /* -EAGAIN from EH routine indicates retry without prejudice. 3655 * The requester is responsible for ensuring forward progress. 3656 */ 3657 if (err != -EAGAIN) 3658 ehc->tries[dev->devno]--; 3659 3660 switch (err) { 3661 case -ENODEV: 3662 /* device missing or wrong IDENTIFY data, schedule probing */ 3663 ehc->i.probe_mask |= (1 << dev->devno); 3664 /* fall through */ 3665 case -EINVAL: 3666 /* give it just one more chance */ 3667 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3668 /* fall through */ 3669 case -EIO: 3670 if (ehc->tries[dev->devno] == 1) { 3671 /* This is the last chance, better to slow 3672 * down than lose it. 3673 */ 3674 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3675 if (dev->pio_mode > XFER_PIO_0) 3676 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3677 } 3678 } 3679 3680 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3681 /* disable device if it has used up all its chances */ 3682 ata_dev_disable(dev); 3683 3684 /* detach if offline */ 3685 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3686 ata_eh_detach_dev(dev); 3687 3688 /* schedule probe if necessary */ 3689 if (ata_eh_schedule_probe(dev)) { 3690 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3691 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3692 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3693 } 3694 3695 return 1; 3696 } else { 3697 ehc->i.action |= ATA_EH_RESET; 3698 return 0; 3699 } 3700 } 3701 3702 /** 3703 * ata_eh_recover - recover host port after error 3704 * @ap: host port to recover 3705 * @prereset: prereset method (can be NULL) 3706 * @softreset: softreset method (can be NULL) 3707 * @hardreset: hardreset method (can be NULL) 3708 * @postreset: postreset method (can be NULL) 3709 * @r_failed_link: out parameter for failed link 3710 * 3711 * This is the alpha and omega, eum and yang, heart and soul of 3712 * libata exception handling. On entry, actions required to 3713 * recover each link and hotplug requests are recorded in the 3714 * link's eh_context. This function executes all the operations 3715 * with appropriate retrials and fallbacks to resurrect failed 3716 * devices, detach goners and greet newcomers. 3717 * 3718 * LOCKING: 3719 * Kernel thread context (may sleep). 3720 * 3721 * RETURNS: 3722 * 0 on success, -errno on failure. 3723 */ 3724 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3725 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3726 ata_postreset_fn_t postreset, 3727 struct ata_link **r_failed_link) 3728 { 3729 struct ata_link *link; 3730 struct ata_device *dev; 3731 int rc, nr_fails; 3732 unsigned long flags, deadline; 3733 3734 DPRINTK("ENTER\n"); 3735 3736 /* prep for recovery */ 3737 ata_for_each_link(link, ap, EDGE) { 3738 struct ata_eh_context *ehc = &link->eh_context; 3739 3740 /* re-enable link? */ 3741 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3742 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3743 spin_lock_irqsave(ap->lock, flags); 3744 link->flags &= ~ATA_LFLAG_DISABLED; 3745 spin_unlock_irqrestore(ap->lock, flags); 3746 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3747 } 3748 3749 ata_for_each_dev(dev, link, ALL) { 3750 if (link->flags & ATA_LFLAG_NO_RETRY) 3751 ehc->tries[dev->devno] = 1; 3752 else 3753 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3754 3755 /* collect port action mask recorded in dev actions */ 3756 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3757 ~ATA_EH_PERDEV_MASK; 3758 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3759 3760 /* process hotplug request */ 3761 if (dev->flags & ATA_DFLAG_DETACH) 3762 ata_eh_detach_dev(dev); 3763 3764 /* schedule probe if necessary */ 3765 if (!ata_dev_enabled(dev)) 3766 ata_eh_schedule_probe(dev); 3767 } 3768 } 3769 3770 retry: 3771 rc = 0; 3772 3773 /* if UNLOADING, finish immediately */ 3774 if (ap->pflags & ATA_PFLAG_UNLOADING) 3775 goto out; 3776 3777 /* prep for EH */ 3778 ata_for_each_link(link, ap, EDGE) { 3779 struct ata_eh_context *ehc = &link->eh_context; 3780 3781 /* skip EH if possible. */ 3782 if (ata_eh_skip_recovery(link)) 3783 ehc->i.action = 0; 3784 3785 ata_for_each_dev(dev, link, ALL) 3786 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3787 } 3788 3789 /* reset */ 3790 ata_for_each_link(link, ap, EDGE) { 3791 struct ata_eh_context *ehc = &link->eh_context; 3792 3793 if (!(ehc->i.action & ATA_EH_RESET)) 3794 continue; 3795 3796 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3797 prereset, softreset, hardreset, postreset); 3798 if (rc) { 3799 ata_link_err(link, "reset failed, giving up\n"); 3800 goto out; 3801 } 3802 } 3803 3804 do { 3805 unsigned long now; 3806 3807 /* 3808 * clears ATA_EH_PARK in eh_info and resets 3809 * ap->park_req_pending 3810 */ 3811 ata_eh_pull_park_action(ap); 3812 3813 deadline = jiffies; 3814 ata_for_each_link(link, ap, EDGE) { 3815 ata_for_each_dev(dev, link, ALL) { 3816 struct ata_eh_context *ehc = &link->eh_context; 3817 unsigned long tmp; 3818 3819 if (dev->class != ATA_DEV_ATA && 3820 dev->class != ATA_DEV_ZAC) 3821 continue; 3822 if (!(ehc->i.dev_action[dev->devno] & 3823 ATA_EH_PARK)) 3824 continue; 3825 tmp = dev->unpark_deadline; 3826 if (time_before(deadline, tmp)) 3827 deadline = tmp; 3828 else if (time_before_eq(tmp, jiffies)) 3829 continue; 3830 if (ehc->unloaded_mask & (1 << dev->devno)) 3831 continue; 3832 3833 ata_eh_park_issue_cmd(dev, 1); 3834 } 3835 } 3836 3837 now = jiffies; 3838 if (time_before_eq(deadline, now)) 3839 break; 3840 3841 ata_eh_release(ap); 3842 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3843 deadline - now); 3844 ata_eh_acquire(ap); 3845 } while (deadline); 3846 ata_for_each_link(link, ap, EDGE) { 3847 ata_for_each_dev(dev, link, ALL) { 3848 if (!(link->eh_context.unloaded_mask & 3849 (1 << dev->devno))) 3850 continue; 3851 3852 ata_eh_park_issue_cmd(dev, 0); 3853 ata_eh_done(link, dev, ATA_EH_PARK); 3854 } 3855 } 3856 3857 /* the rest */ 3858 nr_fails = 0; 3859 ata_for_each_link(link, ap, PMP_FIRST) { 3860 struct ata_eh_context *ehc = &link->eh_context; 3861 3862 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3863 goto config_lpm; 3864 3865 /* revalidate existing devices and attach new ones */ 3866 rc = ata_eh_revalidate_and_attach(link, &dev); 3867 if (rc) 3868 goto rest_fail; 3869 3870 /* if PMP got attached, return, pmp EH will take care of it */ 3871 if (link->device->class == ATA_DEV_PMP) { 3872 ehc->i.action = 0; 3873 return 0; 3874 } 3875 3876 /* configure transfer mode if necessary */ 3877 if (ehc->i.flags & ATA_EHI_SETMODE) { 3878 rc = ata_set_mode(link, &dev); 3879 if (rc) 3880 goto rest_fail; 3881 ehc->i.flags &= ~ATA_EHI_SETMODE; 3882 } 3883 3884 /* If reset has been issued, clear UA to avoid 3885 * disrupting the current users of the device. 3886 */ 3887 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3888 ata_for_each_dev(dev, link, ALL) { 3889 if (dev->class != ATA_DEV_ATAPI) 3890 continue; 3891 rc = atapi_eh_clear_ua(dev); 3892 if (rc) 3893 goto rest_fail; 3894 if (zpodd_dev_enabled(dev)) 3895 zpodd_post_poweron(dev); 3896 } 3897 } 3898 3899 /* retry flush if necessary */ 3900 ata_for_each_dev(dev, link, ALL) { 3901 if (dev->class != ATA_DEV_ATA && 3902 dev->class != ATA_DEV_ZAC) 3903 continue; 3904 rc = ata_eh_maybe_retry_flush(dev); 3905 if (rc) 3906 goto rest_fail; 3907 } 3908 3909 config_lpm: 3910 /* configure link power saving */ 3911 if (link->lpm_policy != ap->target_lpm_policy) { 3912 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); 3913 if (rc) 3914 goto rest_fail; 3915 } 3916 3917 /* this link is okay now */ 3918 ehc->i.flags = 0; 3919 continue; 3920 3921 rest_fail: 3922 nr_fails++; 3923 if (dev) 3924 ata_eh_handle_dev_fail(dev, rc); 3925 3926 if (ap->pflags & ATA_PFLAG_FROZEN) { 3927 /* PMP reset requires working host port. 3928 * Can't retry if it's frozen. 3929 */ 3930 if (sata_pmp_attached(ap)) 3931 goto out; 3932 break; 3933 } 3934 } 3935 3936 if (nr_fails) 3937 goto retry; 3938 3939 out: 3940 if (rc && r_failed_link) 3941 *r_failed_link = link; 3942 3943 DPRINTK("EXIT, rc=%d\n", rc); 3944 return rc; 3945 } 3946 3947 /** 3948 * ata_eh_finish - finish up EH 3949 * @ap: host port to finish EH for 3950 * 3951 * Recovery is complete. Clean up EH states and retry or finish 3952 * failed qcs. 3953 * 3954 * LOCKING: 3955 * None. 3956 */ 3957 void ata_eh_finish(struct ata_port *ap) 3958 { 3959 int tag; 3960 3961 /* retry or finish qcs */ 3962 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 3963 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 3964 3965 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3966 continue; 3967 3968 if (qc->err_mask) { 3969 /* FIXME: Once EH migration is complete, 3970 * generate sense data in this function, 3971 * considering both err_mask and tf. 3972 */ 3973 if (qc->flags & ATA_QCFLAG_RETRY) 3974 ata_eh_qc_retry(qc); 3975 else 3976 ata_eh_qc_complete(qc); 3977 } else { 3978 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3979 ata_eh_qc_complete(qc); 3980 } else { 3981 /* feed zero TF to sense generation */ 3982 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3983 ata_eh_qc_retry(qc); 3984 } 3985 } 3986 } 3987 3988 /* make sure nr_active_links is zero after EH */ 3989 WARN_ON(ap->nr_active_links); 3990 ap->nr_active_links = 0; 3991 } 3992 3993 /** 3994 * ata_do_eh - do standard error handling 3995 * @ap: host port to handle error for 3996 * 3997 * @prereset: prereset method (can be NULL) 3998 * @softreset: softreset method (can be NULL) 3999 * @hardreset: hardreset method (can be NULL) 4000 * @postreset: postreset method (can be NULL) 4001 * 4002 * Perform standard error handling sequence. 4003 * 4004 * LOCKING: 4005 * Kernel thread context (may sleep). 4006 */ 4007 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 4008 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 4009 ata_postreset_fn_t postreset) 4010 { 4011 struct ata_device *dev; 4012 int rc; 4013 4014 ata_eh_autopsy(ap); 4015 ata_eh_report(ap); 4016 4017 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 4018 NULL); 4019 if (rc) { 4020 ata_for_each_dev(dev, &ap->link, ALL) 4021 ata_dev_disable(dev); 4022 } 4023 4024 ata_eh_finish(ap); 4025 } 4026 4027 /** 4028 * ata_std_error_handler - standard error handler 4029 * @ap: host port to handle error for 4030 * 4031 * Standard error handler 4032 * 4033 * LOCKING: 4034 * Kernel thread context (may sleep). 4035 */ 4036 void ata_std_error_handler(struct ata_port *ap) 4037 { 4038 struct ata_port_operations *ops = ap->ops; 4039 ata_reset_fn_t hardreset = ops->hardreset; 4040 4041 /* ignore built-in hardreset if SCR access is not available */ 4042 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link)) 4043 hardreset = NULL; 4044 4045 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 4046 } 4047 4048 #ifdef CONFIG_PM 4049 /** 4050 * ata_eh_handle_port_suspend - perform port suspend operation 4051 * @ap: port to suspend 4052 * 4053 * Suspend @ap. 4054 * 4055 * LOCKING: 4056 * Kernel thread context (may sleep). 4057 */ 4058 static void ata_eh_handle_port_suspend(struct ata_port *ap) 4059 { 4060 unsigned long flags; 4061 int rc = 0; 4062 struct ata_device *dev; 4063 4064 /* are we suspending? */ 4065 spin_lock_irqsave(ap->lock, flags); 4066 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4067 ap->pm_mesg.event & PM_EVENT_RESUME) { 4068 spin_unlock_irqrestore(ap->lock, flags); 4069 return; 4070 } 4071 spin_unlock_irqrestore(ap->lock, flags); 4072 4073 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 4074 4075 /* 4076 * If we have a ZPODD attached, check its zero 4077 * power ready status before the port is frozen. 4078 * Only needed for runtime suspend. 4079 */ 4080 if (PMSG_IS_AUTO(ap->pm_mesg)) { 4081 ata_for_each_dev(dev, &ap->link, ENABLED) { 4082 if (zpodd_dev_enabled(dev)) 4083 zpodd_on_suspend(dev); 4084 } 4085 } 4086 4087 /* tell ACPI we're suspending */ 4088 rc = ata_acpi_on_suspend(ap); 4089 if (rc) 4090 goto out; 4091 4092 /* suspend */ 4093 ata_eh_freeze_port(ap); 4094 4095 if (ap->ops->port_suspend) 4096 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4097 4098 ata_acpi_set_state(ap, ap->pm_mesg); 4099 out: 4100 /* update the flags */ 4101 spin_lock_irqsave(ap->lock, flags); 4102 4103 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4104 if (rc == 0) 4105 ap->pflags |= ATA_PFLAG_SUSPENDED; 4106 else if (ap->pflags & ATA_PFLAG_FROZEN) 4107 ata_port_schedule_eh(ap); 4108 4109 spin_unlock_irqrestore(ap->lock, flags); 4110 4111 return; 4112 } 4113 4114 /** 4115 * ata_eh_handle_port_resume - perform port resume operation 4116 * @ap: port to resume 4117 * 4118 * Resume @ap. 4119 * 4120 * LOCKING: 4121 * Kernel thread context (may sleep). 4122 */ 4123 static void ata_eh_handle_port_resume(struct ata_port *ap) 4124 { 4125 struct ata_link *link; 4126 struct ata_device *dev; 4127 unsigned long flags; 4128 4129 /* are we resuming? */ 4130 spin_lock_irqsave(ap->lock, flags); 4131 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4132 !(ap->pm_mesg.event & PM_EVENT_RESUME)) { 4133 spin_unlock_irqrestore(ap->lock, flags); 4134 return; 4135 } 4136 spin_unlock_irqrestore(ap->lock, flags); 4137 4138 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4139 4140 /* 4141 * Error timestamps are in jiffies which doesn't run while 4142 * suspended and PHY events during resume isn't too uncommon. 4143 * When the two are combined, it can lead to unnecessary speed 4144 * downs if the machine is suspended and resumed repeatedly. 4145 * Clear error history. 4146 */ 4147 ata_for_each_link(link, ap, HOST_FIRST) 4148 ata_for_each_dev(dev, link, ALL) 4149 ata_ering_clear(&dev->ering); 4150 4151 ata_acpi_set_state(ap, ap->pm_mesg); 4152 4153 if (ap->ops->port_resume) 4154 ap->ops->port_resume(ap); 4155 4156 /* tell ACPI that we're resuming */ 4157 ata_acpi_on_resume(ap); 4158 4159 /* update the flags */ 4160 spin_lock_irqsave(ap->lock, flags); 4161 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4162 spin_unlock_irqrestore(ap->lock, flags); 4163 } 4164 #endif /* CONFIG_PM */ 4165