1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * libata-eh.c - libata error handling 4 * 5 * Copyright 2006 Tejun Heo <htejun@gmail.com> 6 * 7 * libata documentation is available via 'make {ps|pdf}docs', 8 * as Documentation/driver-api/libata.rst 9 * 10 * Hardware documentation available from http://www.t13.org/ and 11 * http://www.sata-io.org/ 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/blkdev.h> 16 #include <linux/export.h> 17 #include <linux/pci.h> 18 #include <scsi/scsi.h> 19 #include <scsi/scsi_host.h> 20 #include <scsi/scsi_eh.h> 21 #include <scsi/scsi_device.h> 22 #include <scsi/scsi_cmnd.h> 23 #include <scsi/scsi_dbg.h> 24 #include "../scsi/scsi_transport_api.h" 25 26 #include <linux/libata.h> 27 28 #include <trace/events/libata.h> 29 #include "libata.h" 30 31 enum { 32 /* speed down verdicts */ 33 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 34 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 35 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 36 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 37 38 /* error flags */ 39 ATA_EFLAG_IS_IO = (1 << 0), 40 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 41 ATA_EFLAG_OLD_ER = (1 << 31), 42 43 /* error categories */ 44 ATA_ECAT_NONE = 0, 45 ATA_ECAT_ATA_BUS = 1, 46 ATA_ECAT_TOUT_HSM = 2, 47 ATA_ECAT_UNK_DEV = 3, 48 ATA_ECAT_DUBIOUS_NONE = 4, 49 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 50 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 51 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 52 ATA_ECAT_NR = 8, 53 54 ATA_EH_CMD_DFL_TIMEOUT = 5000, 55 56 /* always put at least this amount of time between resets */ 57 ATA_EH_RESET_COOL_DOWN = 5000, 58 59 /* Waiting in ->prereset can never be reliable. It's 60 * sometimes nice to wait there but it can't be depended upon; 61 * otherwise, we wouldn't be resetting. Just give it enough 62 * time for most drives to spin up. 63 */ 64 ATA_EH_PRERESET_TIMEOUT = 10000, 65 ATA_EH_FASTDRAIN_INTERVAL = 3000, 66 67 ATA_EH_UA_TRIES = 5, 68 69 /* probe speed down parameters, see ata_eh_schedule_probe() */ 70 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 71 ATA_EH_PROBE_TRIALS = 2, 72 }; 73 74 /* The following table determines how we sequence resets. Each entry 75 * represents timeout for that try. The first try can be soft or 76 * hardreset. All others are hardreset if available. In most cases 77 * the first reset w/ 10sec timeout should succeed. Following entries 78 * are mostly for error handling, hotplug and those outlier devices that 79 * take an exceptionally long time to recover from reset. 80 */ 81 static const unsigned int ata_eh_reset_timeouts[] = { 82 10000, /* most drives spin up by 10sec */ 83 10000, /* > 99% working drives spin up before 20sec */ 84 35000, /* give > 30 secs of idleness for outlier devices */ 85 5000, /* and sweet one last chance */ 86 UINT_MAX, /* > 1 min has elapsed, give up */ 87 }; 88 89 static const unsigned int ata_eh_identify_timeouts[] = { 90 5000, /* covers > 99% of successes and not too boring on failures */ 91 10000, /* combined time till here is enough even for media access */ 92 30000, /* for true idiots */ 93 UINT_MAX, 94 }; 95 96 static const unsigned int ata_eh_revalidate_timeouts[] = { 97 15000, /* Some drives are slow to read log pages when waking-up */ 98 15000, /* combined time till here is enough even for media access */ 99 UINT_MAX, 100 }; 101 102 static const unsigned int ata_eh_flush_timeouts[] = { 103 15000, /* be generous with flush */ 104 15000, /* ditto */ 105 30000, /* and even more generous */ 106 UINT_MAX, 107 }; 108 109 static const unsigned int ata_eh_other_timeouts[] = { 110 5000, /* same rationale as identify timeout */ 111 10000, /* ditto */ 112 /* but no merciful 30sec for other commands, it just isn't worth it */ 113 UINT_MAX, 114 }; 115 116 struct ata_eh_cmd_timeout_ent { 117 const u8 *commands; 118 const unsigned int *timeouts; 119 }; 120 121 /* The following table determines timeouts to use for EH internal 122 * commands. Each table entry is a command class and matches the 123 * commands the entry applies to and the timeout table to use. 124 * 125 * On the retry after a command timed out, the next timeout value from 126 * the table is used. If the table doesn't contain further entries, 127 * the last value is used. 128 * 129 * ehc->cmd_timeout_idx keeps track of which timeout to use per 130 * command class, so if SET_FEATURES times out on the first try, the 131 * next try will use the second timeout value only for that class. 132 */ 133 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 134 static const struct ata_eh_cmd_timeout_ent 135 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 136 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 137 .timeouts = ata_eh_identify_timeouts, }, 138 { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT), 139 .timeouts = ata_eh_revalidate_timeouts, }, 140 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 141 .timeouts = ata_eh_other_timeouts, }, 142 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 143 .timeouts = ata_eh_other_timeouts, }, 144 { .commands = CMDS(ATA_CMD_SET_FEATURES), 145 .timeouts = ata_eh_other_timeouts, }, 146 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 147 .timeouts = ata_eh_other_timeouts, }, 148 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 149 .timeouts = ata_eh_flush_timeouts }, 150 { .commands = CMDS(ATA_CMD_VERIFY), 151 .timeouts = ata_eh_reset_timeouts }, 152 }; 153 #undef CMDS 154 155 static void __ata_port_freeze(struct ata_port *ap); 156 #ifdef CONFIG_PM 157 static void ata_eh_handle_port_suspend(struct ata_port *ap); 158 static void ata_eh_handle_port_resume(struct ata_port *ap); 159 #else /* CONFIG_PM */ 160 static void ata_eh_handle_port_suspend(struct ata_port *ap) 161 { } 162 163 static void ata_eh_handle_port_resume(struct ata_port *ap) 164 { } 165 #endif /* CONFIG_PM */ 166 167 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, 168 const char *fmt, va_list args) 169 { 170 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 171 ATA_EH_DESC_LEN - ehi->desc_len, 172 fmt, args); 173 } 174 175 /** 176 * __ata_ehi_push_desc - push error description without adding separator 177 * @ehi: target EHI 178 * @fmt: printf format string 179 * 180 * Format string according to @fmt and append it to @ehi->desc. 181 * 182 * LOCKING: 183 * spin_lock_irqsave(host lock) 184 */ 185 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 186 { 187 va_list args; 188 189 va_start(args, fmt); 190 __ata_ehi_pushv_desc(ehi, fmt, args); 191 va_end(args); 192 } 193 EXPORT_SYMBOL_GPL(__ata_ehi_push_desc); 194 195 /** 196 * ata_ehi_push_desc - push error description with separator 197 * @ehi: target EHI 198 * @fmt: printf format string 199 * 200 * Format string according to @fmt and append it to @ehi->desc. 201 * If @ehi->desc is not empty, ", " is added in-between. 202 * 203 * LOCKING: 204 * spin_lock_irqsave(host lock) 205 */ 206 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 207 { 208 va_list args; 209 210 if (ehi->desc_len) 211 __ata_ehi_push_desc(ehi, ", "); 212 213 va_start(args, fmt); 214 __ata_ehi_pushv_desc(ehi, fmt, args); 215 va_end(args); 216 } 217 EXPORT_SYMBOL_GPL(ata_ehi_push_desc); 218 219 /** 220 * ata_ehi_clear_desc - clean error description 221 * @ehi: target EHI 222 * 223 * Clear @ehi->desc. 224 * 225 * LOCKING: 226 * spin_lock_irqsave(host lock) 227 */ 228 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 229 { 230 ehi->desc[0] = '\0'; 231 ehi->desc_len = 0; 232 } 233 EXPORT_SYMBOL_GPL(ata_ehi_clear_desc); 234 235 /** 236 * ata_port_desc - append port description 237 * @ap: target ATA port 238 * @fmt: printf format string 239 * 240 * Format string according to @fmt and append it to port 241 * description. If port description is not empty, " " is added 242 * in-between. This function is to be used while initializing 243 * ata_host. The description is printed on host registration. 244 * 245 * LOCKING: 246 * None. 247 */ 248 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 249 { 250 va_list args; 251 252 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 253 254 if (ap->link.eh_info.desc_len) 255 __ata_ehi_push_desc(&ap->link.eh_info, " "); 256 257 va_start(args, fmt); 258 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 259 va_end(args); 260 } 261 EXPORT_SYMBOL_GPL(ata_port_desc); 262 263 #ifdef CONFIG_PCI 264 /** 265 * ata_port_pbar_desc - append PCI BAR description 266 * @ap: target ATA port 267 * @bar: target PCI BAR 268 * @offset: offset into PCI BAR 269 * @name: name of the area 270 * 271 * If @offset is negative, this function formats a string which 272 * contains the name, address, size and type of the BAR and 273 * appends it to the port description. If @offset is zero or 274 * positive, only name and offsetted address is appended. 275 * 276 * LOCKING: 277 * None. 278 */ 279 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 280 const char *name) 281 { 282 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 283 char *type = ""; 284 unsigned long long start, len; 285 286 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 287 type = "m"; 288 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 289 type = "i"; 290 291 start = (unsigned long long)pci_resource_start(pdev, bar); 292 len = (unsigned long long)pci_resource_len(pdev, bar); 293 294 if (offset < 0) 295 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 296 else 297 ata_port_desc(ap, "%s 0x%llx", name, 298 start + (unsigned long long)offset); 299 } 300 EXPORT_SYMBOL_GPL(ata_port_pbar_desc); 301 #endif /* CONFIG_PCI */ 302 303 static int ata_lookup_timeout_table(u8 cmd) 304 { 305 int i; 306 307 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 308 const u8 *cur; 309 310 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 311 if (*cur == cmd) 312 return i; 313 } 314 315 return -1; 316 } 317 318 /** 319 * ata_internal_cmd_timeout - determine timeout for an internal command 320 * @dev: target device 321 * @cmd: internal command to be issued 322 * 323 * Determine timeout for internal command @cmd for @dev. 324 * 325 * LOCKING: 326 * EH context. 327 * 328 * RETURNS: 329 * Determined timeout. 330 */ 331 unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 332 { 333 struct ata_eh_context *ehc = &dev->link->eh_context; 334 int ent = ata_lookup_timeout_table(cmd); 335 int idx; 336 337 if (ent < 0) 338 return ATA_EH_CMD_DFL_TIMEOUT; 339 340 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 341 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 342 } 343 344 /** 345 * ata_internal_cmd_timed_out - notification for internal command timeout 346 * @dev: target device 347 * @cmd: internal command which timed out 348 * 349 * Notify EH that internal command @cmd for @dev timed out. This 350 * function should be called only for commands whose timeouts are 351 * determined using ata_internal_cmd_timeout(). 352 * 353 * LOCKING: 354 * EH context. 355 */ 356 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 357 { 358 struct ata_eh_context *ehc = &dev->link->eh_context; 359 int ent = ata_lookup_timeout_table(cmd); 360 int idx; 361 362 if (ent < 0) 363 return; 364 365 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 366 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != UINT_MAX) 367 ehc->cmd_timeout_idx[dev->devno][ent]++; 368 } 369 370 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 371 unsigned int err_mask) 372 { 373 struct ata_ering_entry *ent; 374 375 WARN_ON(!err_mask); 376 377 ering->cursor++; 378 ering->cursor %= ATA_ERING_SIZE; 379 380 ent = &ering->ring[ering->cursor]; 381 ent->eflags = eflags; 382 ent->err_mask = err_mask; 383 ent->timestamp = get_jiffies_64(); 384 } 385 386 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 387 { 388 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 389 390 if (ent->err_mask) 391 return ent; 392 return NULL; 393 } 394 395 int ata_ering_map(struct ata_ering *ering, 396 int (*map_fn)(struct ata_ering_entry *, void *), 397 void *arg) 398 { 399 int idx, rc = 0; 400 struct ata_ering_entry *ent; 401 402 idx = ering->cursor; 403 do { 404 ent = &ering->ring[idx]; 405 if (!ent->err_mask) 406 break; 407 rc = map_fn(ent, arg); 408 if (rc) 409 break; 410 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 411 } while (idx != ering->cursor); 412 413 return rc; 414 } 415 416 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 417 { 418 ent->eflags |= ATA_EFLAG_OLD_ER; 419 return 0; 420 } 421 422 static void ata_ering_clear(struct ata_ering *ering) 423 { 424 ata_ering_map(ering, ata_ering_clear_cb, NULL); 425 } 426 427 static unsigned int ata_eh_dev_action(struct ata_device *dev) 428 { 429 struct ata_eh_context *ehc = &dev->link->eh_context; 430 431 return ehc->i.action | ehc->i.dev_action[dev->devno]; 432 } 433 434 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 435 struct ata_eh_info *ehi, unsigned int action) 436 { 437 struct ata_device *tdev; 438 439 if (!dev) { 440 ehi->action &= ~action; 441 ata_for_each_dev(tdev, link, ALL) 442 ehi->dev_action[tdev->devno] &= ~action; 443 } else { 444 /* doesn't make sense for port-wide EH actions */ 445 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 446 447 /* break ehi->action into ehi->dev_action */ 448 if (ehi->action & action) { 449 ata_for_each_dev(tdev, link, ALL) 450 ehi->dev_action[tdev->devno] |= 451 ehi->action & action; 452 ehi->action &= ~action; 453 } 454 455 /* turn off the specified per-dev action */ 456 ehi->dev_action[dev->devno] &= ~action; 457 } 458 } 459 460 /** 461 * ata_eh_acquire - acquire EH ownership 462 * @ap: ATA port to acquire EH ownership for 463 * 464 * Acquire EH ownership for @ap. This is the basic exclusion 465 * mechanism for ports sharing a host. Only one port hanging off 466 * the same host can claim the ownership of EH. 467 * 468 * LOCKING: 469 * EH context. 470 */ 471 void ata_eh_acquire(struct ata_port *ap) 472 { 473 mutex_lock(&ap->host->eh_mutex); 474 WARN_ON_ONCE(ap->host->eh_owner); 475 ap->host->eh_owner = current; 476 } 477 478 /** 479 * ata_eh_release - release EH ownership 480 * @ap: ATA port to release EH ownership for 481 * 482 * Release EH ownership for @ap if the caller. The caller must 483 * have acquired EH ownership using ata_eh_acquire() previously. 484 * 485 * LOCKING: 486 * EH context. 487 */ 488 void ata_eh_release(struct ata_port *ap) 489 { 490 WARN_ON_ONCE(ap->host->eh_owner != current); 491 ap->host->eh_owner = NULL; 492 mutex_unlock(&ap->host->eh_mutex); 493 } 494 495 static void ata_eh_dev_disable(struct ata_device *dev) 496 { 497 ata_acpi_on_disable(dev); 498 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 499 dev->class++; 500 501 /* 502 * From now till the next successful probe, ering is used to 503 * track probe failures. Clear accumulated device error info. 504 */ 505 ata_ering_clear(&dev->ering); 506 507 ata_dev_free_resources(dev); 508 } 509 510 static void ata_eh_unload(struct ata_port *ap) 511 { 512 struct ata_link *link; 513 struct ata_device *dev; 514 unsigned long flags; 515 516 /* 517 * Unless we are restarting, transition all enabled devices to 518 * standby power mode. 519 */ 520 if (system_state != SYSTEM_RESTART) { 521 ata_for_each_link(link, ap, PMP_FIRST) { 522 ata_for_each_dev(dev, link, ENABLED) 523 ata_dev_power_set_standby(dev); 524 } 525 } 526 527 /* 528 * Restore SControl IPM and SPD for the next driver and 529 * disable attached devices. 530 */ 531 ata_for_each_link(link, ap, PMP_FIRST) { 532 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 533 ata_for_each_dev(dev, link, ENABLED) 534 ata_eh_dev_disable(dev); 535 } 536 537 /* freeze and set UNLOADED */ 538 spin_lock_irqsave(ap->lock, flags); 539 540 ata_port_freeze(ap); /* won't be thawed */ 541 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 542 ap->pflags |= ATA_PFLAG_UNLOADED; 543 544 spin_unlock_irqrestore(ap->lock, flags); 545 } 546 547 /** 548 * ata_scsi_error - SCSI layer error handler callback 549 * @host: SCSI host on which error occurred 550 * 551 * Handles SCSI-layer-thrown error events. 552 * 553 * LOCKING: 554 * Inherited from SCSI layer (none, can sleep) 555 * 556 * RETURNS: 557 * Zero. 558 */ 559 void ata_scsi_error(struct Scsi_Host *host) 560 { 561 struct ata_port *ap = ata_shost_to_port(host); 562 unsigned long flags; 563 LIST_HEAD(eh_work_q); 564 565 spin_lock_irqsave(host->host_lock, flags); 566 list_splice_init(&host->eh_cmd_q, &eh_work_q); 567 spin_unlock_irqrestore(host->host_lock, flags); 568 569 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 570 571 /* If we timed raced normal completion and there is nothing to 572 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 573 ata_scsi_port_error_handler(host, ap); 574 575 /* finish or retry handled scmd's and clean up */ 576 WARN_ON(!list_empty(&eh_work_q)); 577 578 } 579 580 /** 581 * ata_scsi_cmd_error_handler - error callback for a list of commands 582 * @host: scsi host containing the port 583 * @ap: ATA port within the host 584 * @eh_work_q: list of commands to process 585 * 586 * process the given list of commands and return those finished to the 587 * ap->eh_done_q. This function is the first part of the libata error 588 * handler which processes a given list of failed commands. 589 */ 590 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 591 struct list_head *eh_work_q) 592 { 593 int i; 594 unsigned long flags; 595 struct scsi_cmnd *scmd, *tmp; 596 int nr_timedout = 0; 597 598 /* make sure sff pio task is not running */ 599 ata_sff_flush_pio_task(ap); 600 601 /* synchronize with host lock and sort out timeouts */ 602 603 /* 604 * For EH, all qcs are finished in one of three ways - 605 * normal completion, error completion, and SCSI timeout. 606 * Both completions can race against SCSI timeout. When normal 607 * completion wins, the qc never reaches EH. When error 608 * completion wins, the qc has ATA_QCFLAG_EH set. 609 * 610 * When SCSI timeout wins, things are a bit more complex. 611 * Normal or error completion can occur after the timeout but 612 * before this point. In such cases, both types of 613 * completions are honored. A scmd is determined to have 614 * timed out iff its associated qc is active and not failed. 615 */ 616 spin_lock_irqsave(ap->lock, flags); 617 618 /* 619 * This must occur under the ap->lock as we don't want 620 * a polled recovery to race the real interrupt handler 621 * 622 * The lost_interrupt handler checks for any completed but 623 * non-notified command and completes much like an IRQ handler. 624 * 625 * We then fall into the error recovery code which will treat 626 * this as if normal completion won the race 627 */ 628 if (ap->ops->lost_interrupt) 629 ap->ops->lost_interrupt(ap); 630 631 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 632 struct ata_queued_cmd *qc; 633 634 /* 635 * If the scmd was added to EH, via ata_qc_schedule_eh() -> 636 * scsi_timeout() -> scsi_eh_scmd_add(), scsi_timeout() will 637 * have set DID_TIME_OUT (since libata does not have an abort 638 * handler). Thus, to clear DID_TIME_OUT, clear the host byte. 639 */ 640 set_host_byte(scmd, DID_OK); 641 642 ata_qc_for_each_raw(ap, qc, i) { 643 if (qc->scsicmd != scmd) 644 continue; 645 if ((qc->flags & ATA_QCFLAG_ACTIVE) || 646 qc == ap->deferred_qc) 647 break; 648 } 649 650 if (qc == ap->deferred_qc) { 651 /* 652 * This is a deferred command that timed out while 653 * waiting for the command queue to drain. Since the qc 654 * is not active yet (deferred_qc is still set, so the 655 * deferred qc work has not issued the command yet), 656 * simply signal the timeout by finishing the SCSI 657 * command and clear the deferred qc to prevent the 658 * deferred qc work from issuing this qc. 659 */ 660 WARN_ON_ONCE(qc->flags & ATA_QCFLAG_ACTIVE); 661 ap->deferred_qc = NULL; 662 set_host_byte(scmd, DID_TIME_OUT); 663 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 664 } else if (i < ATA_MAX_QUEUE) { 665 /* the scmd has an associated qc */ 666 if (!(qc->flags & ATA_QCFLAG_EH)) { 667 /* which hasn't failed yet, timeout */ 668 set_host_byte(scmd, DID_TIME_OUT); 669 qc->err_mask |= AC_ERR_TIMEOUT; 670 qc->flags |= ATA_QCFLAG_EH; 671 nr_timedout++; 672 } 673 } else { 674 /* Normal completion occurred after 675 * SCSI timeout but before this point. 676 * Successfully complete it. 677 */ 678 scmd->retries = scmd->allowed; 679 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 680 } 681 } 682 683 /* 684 * If we have timed out qcs. They belong to EH from 685 * this point but the state of the controller is 686 * unknown. Freeze the port to make sure the IRQ 687 * handler doesn't diddle with those qcs. This must 688 * be done atomically w.r.t. setting ATA_QCFLAG_EH. 689 */ 690 if (nr_timedout) 691 __ata_port_freeze(ap); 692 693 /* initialize eh_tries */ 694 ap->eh_tries = ATA_EH_MAX_TRIES; 695 696 spin_unlock_irqrestore(ap->lock, flags); 697 } 698 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 699 700 /** 701 * ata_scsi_port_error_handler - recover the port after the commands 702 * @host: SCSI host containing the port 703 * @ap: the ATA port 704 * 705 * Handle the recovery of the port @ap after all the commands 706 * have been recovered. 707 */ 708 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 709 { 710 unsigned long flags; 711 struct ata_link *link; 712 713 /* acquire EH ownership */ 714 ata_eh_acquire(ap); 715 repeat: 716 /* kill fast drain timer */ 717 timer_delete_sync(&ap->fastdrain_timer); 718 719 /* process port resume request */ 720 ata_eh_handle_port_resume(ap); 721 722 /* fetch & clear EH info */ 723 spin_lock_irqsave(ap->lock, flags); 724 725 ata_for_each_link(link, ap, HOST_FIRST) { 726 struct ata_eh_context *ehc = &link->eh_context; 727 struct ata_device *dev; 728 729 memset(&link->eh_context, 0, sizeof(link->eh_context)); 730 link->eh_context.i = link->eh_info; 731 memset(&link->eh_info, 0, sizeof(link->eh_info)); 732 733 ata_for_each_dev(dev, link, ENABLED) { 734 int devno = dev->devno; 735 736 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 737 if (ata_ncq_enabled(dev)) 738 ehc->saved_ncq_enabled |= 1 << devno; 739 740 /* If we are resuming, wake up the device */ 741 if (ap->pflags & ATA_PFLAG_RESUMING) { 742 dev->flags |= ATA_DFLAG_RESUMING; 743 ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; 744 } 745 } 746 } 747 748 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 749 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 750 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 751 752 spin_unlock_irqrestore(ap->lock, flags); 753 754 /* invoke EH, skip if unloading or suspended */ 755 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)) && 756 ata_adapter_is_online(ap)) 757 ap->ops->error_handler(ap); 758 else { 759 /* if unloading, commence suicide */ 760 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 761 !(ap->pflags & ATA_PFLAG_UNLOADED)) 762 ata_eh_unload(ap); 763 ata_eh_finish(ap); 764 } 765 766 /* process port suspend request */ 767 ata_eh_handle_port_suspend(ap); 768 769 /* 770 * Exception might have happened after ->error_handler recovered the 771 * port but before this point. Repeat EH in such case. 772 */ 773 spin_lock_irqsave(ap->lock, flags); 774 775 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 776 if (--ap->eh_tries) { 777 spin_unlock_irqrestore(ap->lock, flags); 778 goto repeat; 779 } 780 ata_port_err(ap, 781 "EH pending after %d tries, giving up\n", 782 ATA_EH_MAX_TRIES); 783 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 784 } 785 786 /* this run is complete, make sure EH info is clear */ 787 ata_for_each_link(link, ap, HOST_FIRST) 788 memset(&link->eh_info, 0, sizeof(link->eh_info)); 789 790 /* 791 * end eh (clear host_eh_scheduled) while holding ap->lock such that if 792 * exception occurs after this point but before EH completion, SCSI 793 * midlayer will re-initiate EH. 794 */ 795 ap->ops->end_eh(ap); 796 797 spin_unlock_irqrestore(ap->lock, flags); 798 ata_eh_release(ap); 799 800 scsi_eh_flush_done_q(&ap->eh_done_q); 801 802 /* clean up */ 803 spin_lock_irqsave(ap->lock, flags); 804 805 ap->pflags &= ~ATA_PFLAG_RESUMING; 806 807 if (ap->pflags & ATA_PFLAG_LOADING) 808 ap->pflags &= ~ATA_PFLAG_LOADING; 809 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && 810 !(ap->flags & ATA_FLAG_SAS_HOST)) 811 schedule_delayed_work(&ap->hotplug_task, 0); 812 813 if (ap->pflags & ATA_PFLAG_RECOVERED) 814 ata_port_info(ap, "EH complete\n"); 815 816 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 817 818 /* tell wait_eh that we're done */ 819 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 820 wake_up_all(&ap->eh_wait_q); 821 822 spin_unlock_irqrestore(ap->lock, flags); 823 } 824 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 825 826 /** 827 * ata_port_wait_eh - Wait for the currently pending EH to complete 828 * @ap: Port to wait EH for 829 * 830 * Wait until the currently pending EH is complete. 831 * 832 * LOCKING: 833 * Kernel thread context (may sleep). 834 */ 835 void ata_port_wait_eh(struct ata_port *ap) 836 { 837 unsigned long flags; 838 DEFINE_WAIT(wait); 839 840 retry: 841 spin_lock_irqsave(ap->lock, flags); 842 843 while (ata_port_eh_scheduled(ap)) { 844 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 845 spin_unlock_irqrestore(ap->lock, flags); 846 schedule(); 847 spin_lock_irqsave(ap->lock, flags); 848 } 849 finish_wait(&ap->eh_wait_q, &wait); 850 851 spin_unlock_irqrestore(ap->lock, flags); 852 853 /* make sure SCSI EH is complete */ 854 if (scsi_host_in_recovery(ap->scsi_host)) { 855 ata_msleep(ap, 10); 856 goto retry; 857 } 858 } 859 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 860 861 static unsigned int ata_eh_nr_in_flight(struct ata_port *ap) 862 { 863 struct ata_queued_cmd *qc; 864 unsigned int tag; 865 unsigned int nr = 0; 866 867 /* count only non-internal commands */ 868 ata_qc_for_each(ap, qc, tag) { 869 if (qc) 870 nr++; 871 } 872 873 return nr; 874 } 875 876 void ata_eh_fastdrain_timerfn(struct timer_list *t) 877 { 878 struct ata_port *ap = timer_container_of(ap, t, fastdrain_timer); 879 unsigned long flags; 880 unsigned int cnt; 881 882 spin_lock_irqsave(ap->lock, flags); 883 884 cnt = ata_eh_nr_in_flight(ap); 885 886 /* are we done? */ 887 if (!cnt) 888 goto out_unlock; 889 890 if (cnt == ap->fastdrain_cnt) { 891 struct ata_queued_cmd *qc; 892 unsigned int tag; 893 894 /* No progress during the last interval, tag all 895 * in-flight qcs as timed out and freeze the port. 896 */ 897 ata_qc_for_each(ap, qc, tag) { 898 if (qc) 899 qc->err_mask |= AC_ERR_TIMEOUT; 900 } 901 902 ata_port_freeze(ap); 903 } else { 904 /* some qcs have finished, give it another chance */ 905 ap->fastdrain_cnt = cnt; 906 ap->fastdrain_timer.expires = 907 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 908 add_timer(&ap->fastdrain_timer); 909 } 910 911 out_unlock: 912 spin_unlock_irqrestore(ap->lock, flags); 913 } 914 915 /** 916 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 917 * @ap: target ATA port 918 * @fastdrain: activate fast drain 919 * 920 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 921 * is non-zero and EH wasn't pending before. Fast drain ensures 922 * that EH kicks in in timely manner. 923 * 924 * LOCKING: 925 * spin_lock_irqsave(host lock) 926 */ 927 static void ata_eh_set_pending(struct ata_port *ap, bool fastdrain) 928 { 929 unsigned int cnt; 930 931 /* already scheduled? */ 932 if (ap->pflags & ATA_PFLAG_EH_PENDING) 933 return; 934 935 ap->pflags |= ATA_PFLAG_EH_PENDING; 936 937 /* 938 * If we have a deferred qc, requeue it so that it is retried once EH 939 * completes. 940 */ 941 ata_scsi_requeue_deferred_qc(ap); 942 943 if (!fastdrain) 944 return; 945 946 /* do we have in-flight qcs? */ 947 cnt = ata_eh_nr_in_flight(ap); 948 if (!cnt) 949 return; 950 951 /* activate fast drain */ 952 ap->fastdrain_cnt = cnt; 953 ap->fastdrain_timer.expires = 954 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 955 add_timer(&ap->fastdrain_timer); 956 } 957 958 /** 959 * ata_qc_schedule_eh - schedule qc for error handling 960 * @qc: command to schedule error handling for 961 * 962 * Schedule error handling for @qc. EH will kick in as soon as 963 * other commands are drained. 964 * 965 * LOCKING: 966 * spin_lock_irqsave(host lock) 967 */ 968 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 969 { 970 struct ata_port *ap = qc->ap; 971 972 qc->flags |= ATA_QCFLAG_EH; 973 ata_eh_set_pending(ap, true); 974 975 /* The following will fail if timeout has already expired. 976 * ata_scsi_error() takes care of such scmds on EH entry. 977 * Note that ATA_QCFLAG_EH is unconditionally set after 978 * this function completes. 979 */ 980 blk_abort_request(scsi_cmd_to_rq(qc->scsicmd)); 981 } 982 983 /** 984 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine 985 * @ap: ATA port to schedule EH for 986 * 987 * LOCKING: inherited from ata_port_schedule_eh 988 * spin_lock_irqsave(host lock) 989 */ 990 void ata_std_sched_eh(struct ata_port *ap) 991 { 992 if (ap->pflags & ATA_PFLAG_INITIALIZING) 993 return; 994 995 ata_eh_set_pending(ap, true); 996 scsi_schedule_eh(ap->scsi_host); 997 998 trace_ata_std_sched_eh(ap); 999 } 1000 EXPORT_SYMBOL_GPL(ata_std_sched_eh); 1001 1002 /** 1003 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine 1004 * @ap: ATA port to end EH for 1005 * 1006 * In the libata object model there is a 1:1 mapping of ata_port to 1007 * shost, so host fields can be directly manipulated under ap->lock, in 1008 * the libsas case we need to hold a lock at the ha->level to coordinate 1009 * these events. 1010 * 1011 * LOCKING: 1012 * spin_lock_irqsave(host lock) 1013 */ 1014 void ata_std_end_eh(struct ata_port *ap) 1015 { 1016 struct Scsi_Host *host = ap->scsi_host; 1017 1018 host->host_eh_scheduled = 0; 1019 } 1020 EXPORT_SYMBOL(ata_std_end_eh); 1021 1022 1023 /** 1024 * ata_port_schedule_eh - schedule error handling without a qc 1025 * @ap: ATA port to schedule EH for 1026 * 1027 * Schedule error handling for @ap. EH will kick in as soon as 1028 * all commands are drained. 1029 * 1030 * LOCKING: 1031 * spin_lock_irqsave(host lock) 1032 */ 1033 void ata_port_schedule_eh(struct ata_port *ap) 1034 { 1035 /* see: ata_std_sched_eh, unless you know better */ 1036 ap->ops->sched_eh(ap); 1037 } 1038 EXPORT_SYMBOL_GPL(ata_port_schedule_eh); 1039 1040 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 1041 { 1042 struct ata_queued_cmd *qc; 1043 int tag, nr_aborted = 0; 1044 1045 /* we're gonna abort all commands, no need for fast drain */ 1046 ata_eh_set_pending(ap, false); 1047 1048 /* include internal tag in iteration */ 1049 ata_qc_for_each_with_internal(ap, qc, tag) { 1050 if (qc && (!link || qc->dev->link == link)) { 1051 qc->flags |= ATA_QCFLAG_EH; 1052 ata_qc_complete(qc); 1053 nr_aborted++; 1054 } 1055 } 1056 1057 if (!nr_aborted) 1058 ata_port_schedule_eh(ap); 1059 1060 return nr_aborted; 1061 } 1062 1063 /** 1064 * ata_link_abort - abort all qc's on the link 1065 * @link: ATA link to abort qc's for 1066 * 1067 * Abort all active qc's active on @link and schedule EH. 1068 * 1069 * LOCKING: 1070 * spin_lock_irqsave(host lock) 1071 * 1072 * RETURNS: 1073 * Number of aborted qc's. 1074 */ 1075 int ata_link_abort(struct ata_link *link) 1076 { 1077 return ata_do_link_abort(link->ap, link); 1078 } 1079 EXPORT_SYMBOL_GPL(ata_link_abort); 1080 1081 /** 1082 * ata_port_abort - abort all qc's on the port 1083 * @ap: ATA port to abort qc's for 1084 * 1085 * Abort all active qc's of @ap and schedule EH. 1086 * 1087 * LOCKING: 1088 * spin_lock_irqsave(host_set lock) 1089 * 1090 * RETURNS: 1091 * Number of aborted qc's. 1092 */ 1093 int ata_port_abort(struct ata_port *ap) 1094 { 1095 return ata_do_link_abort(ap, NULL); 1096 } 1097 EXPORT_SYMBOL_GPL(ata_port_abort); 1098 1099 /** 1100 * __ata_port_freeze - freeze port 1101 * @ap: ATA port to freeze 1102 * 1103 * This function is called when HSM violation or some other 1104 * condition disrupts normal operation of the port. Frozen port 1105 * is not allowed to perform any operation until the port is 1106 * thawed, which usually follows a successful reset. 1107 * 1108 * ap->ops->freeze() callback can be used for freezing the port 1109 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1110 * port cannot be frozen hardware-wise, the interrupt handler 1111 * must ack and clear interrupts unconditionally while the port 1112 * is frozen. 1113 * 1114 * LOCKING: 1115 * spin_lock_irqsave(host lock) 1116 */ 1117 static void __ata_port_freeze(struct ata_port *ap) 1118 { 1119 if (ap->ops->freeze) 1120 ap->ops->freeze(ap); 1121 1122 ap->pflags |= ATA_PFLAG_FROZEN; 1123 1124 trace_ata_port_freeze(ap); 1125 } 1126 1127 /** 1128 * ata_port_freeze - abort & freeze port 1129 * @ap: ATA port to freeze 1130 * 1131 * Abort and freeze @ap. The freeze operation must be called 1132 * first, because some hardware requires special operations 1133 * before the taskfile registers are accessible. 1134 * 1135 * LOCKING: 1136 * spin_lock_irqsave(host lock) 1137 * 1138 * RETURNS: 1139 * Number of aborted commands. 1140 */ 1141 int ata_port_freeze(struct ata_port *ap) 1142 { 1143 __ata_port_freeze(ap); 1144 1145 return ata_port_abort(ap); 1146 } 1147 EXPORT_SYMBOL_GPL(ata_port_freeze); 1148 1149 /** 1150 * ata_eh_freeze_port - EH helper to freeze port 1151 * @ap: ATA port to freeze 1152 * 1153 * Freeze @ap. 1154 * 1155 * LOCKING: 1156 * None. 1157 */ 1158 void ata_eh_freeze_port(struct ata_port *ap) 1159 { 1160 unsigned long flags; 1161 1162 spin_lock_irqsave(ap->lock, flags); 1163 __ata_port_freeze(ap); 1164 spin_unlock_irqrestore(ap->lock, flags); 1165 } 1166 EXPORT_SYMBOL_GPL(ata_eh_freeze_port); 1167 1168 /** 1169 * ata_eh_thaw_port - EH helper to thaw port 1170 * @ap: ATA port to thaw 1171 * 1172 * Thaw frozen port @ap. 1173 * 1174 * LOCKING: 1175 * None. 1176 */ 1177 void ata_eh_thaw_port(struct ata_port *ap) 1178 { 1179 unsigned long flags; 1180 1181 spin_lock_irqsave(ap->lock, flags); 1182 1183 ap->pflags &= ~ATA_PFLAG_FROZEN; 1184 1185 if (ap->ops->thaw) 1186 ap->ops->thaw(ap); 1187 1188 spin_unlock_irqrestore(ap->lock, flags); 1189 1190 trace_ata_port_thaw(ap); 1191 } 1192 1193 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1194 { 1195 /* nada */ 1196 } 1197 1198 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1199 { 1200 struct ata_port *ap = qc->ap; 1201 struct scsi_cmnd *scmd = qc->scsicmd; 1202 unsigned long flags; 1203 1204 spin_lock_irqsave(ap->lock, flags); 1205 qc->scsidone = ata_eh_scsidone; 1206 __ata_qc_complete(qc); 1207 WARN_ON(ata_tag_valid(qc->tag)); 1208 spin_unlock_irqrestore(ap->lock, flags); 1209 1210 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1211 } 1212 1213 /** 1214 * ata_eh_qc_complete - Complete an active ATA command from EH 1215 * @qc: Command to complete 1216 * 1217 * Indicate to the mid and upper layers that an ATA command has 1218 * completed. To be used from EH. 1219 */ 1220 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1221 { 1222 struct scsi_cmnd *scmd = qc->scsicmd; 1223 scmd->retries = scmd->allowed; 1224 __ata_eh_qc_complete(qc); 1225 } 1226 1227 /** 1228 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1229 * @qc: Command to retry 1230 * 1231 * Indicate to the mid and upper layers that an ATA command 1232 * should be retried. To be used from EH. 1233 * 1234 * SCSI midlayer limits the number of retries to scmd->allowed. 1235 * scmd->allowed is incremented for commands which get retried 1236 * due to unrelated failures (qc->err_mask is zero). 1237 */ 1238 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1239 { 1240 struct scsi_cmnd *scmd = qc->scsicmd; 1241 if (!qc->err_mask) 1242 scmd->allowed++; 1243 __ata_eh_qc_complete(qc); 1244 } 1245 1246 /** 1247 * ata_dev_disable - disable ATA device 1248 * @dev: ATA device to disable 1249 * 1250 * Disable @dev. 1251 * 1252 * Locking: 1253 * EH context. 1254 */ 1255 void ata_dev_disable(struct ata_device *dev) 1256 { 1257 if (!ata_dev_enabled(dev)) 1258 return; 1259 1260 ata_dev_warn(dev, "disable device\n"); 1261 1262 ata_eh_dev_disable(dev); 1263 } 1264 EXPORT_SYMBOL_GPL(ata_dev_disable); 1265 1266 /** 1267 * ata_eh_detach_dev - detach ATA device 1268 * @dev: ATA device to detach 1269 * 1270 * Detach @dev. 1271 * 1272 * LOCKING: 1273 * None. 1274 */ 1275 void ata_eh_detach_dev(struct ata_device *dev) 1276 { 1277 struct ata_link *link = dev->link; 1278 struct ata_port *ap = link->ap; 1279 struct ata_eh_context *ehc = &link->eh_context; 1280 unsigned long flags; 1281 1282 /* 1283 * If the device is still enabled, transition it to standby power mode 1284 * (i.e. spin down HDDs) and disable it. 1285 */ 1286 if (ata_dev_enabled(dev)) { 1287 ata_dev_power_set_standby(dev); 1288 ata_eh_dev_disable(dev); 1289 } 1290 1291 spin_lock_irqsave(ap->lock, flags); 1292 1293 dev->flags &= ~ATA_DFLAG_DETACH; 1294 1295 if (ata_scsi_offline_dev(dev)) { 1296 dev->flags |= ATA_DFLAG_DETACHED; 1297 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1298 } 1299 1300 /* clear per-dev EH info */ 1301 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1302 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1303 ehc->saved_xfer_mode[dev->devno] = 0; 1304 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1305 1306 spin_unlock_irqrestore(ap->lock, flags); 1307 } 1308 1309 /** 1310 * ata_eh_about_to_do - about to perform eh_action 1311 * @link: target ATA link 1312 * @dev: target ATA dev for per-dev action (can be NULL) 1313 * @action: action about to be performed 1314 * 1315 * Called just before performing EH actions to clear related bits 1316 * in @link->eh_info such that eh actions are not unnecessarily 1317 * repeated. 1318 * 1319 * LOCKING: 1320 * None. 1321 */ 1322 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1323 unsigned int action) 1324 { 1325 struct ata_port *ap = link->ap; 1326 struct ata_eh_info *ehi = &link->eh_info; 1327 struct ata_eh_context *ehc = &link->eh_context; 1328 unsigned long flags; 1329 1330 trace_ata_eh_about_to_do(link, dev ? dev->devno : 0, action); 1331 1332 spin_lock_irqsave(ap->lock, flags); 1333 1334 ata_eh_clear_action(link, dev, ehi, action); 1335 1336 /* About to take EH action, set RECOVERED. Ignore actions on 1337 * slave links as master will do them again. 1338 */ 1339 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1340 ap->pflags |= ATA_PFLAG_RECOVERED; 1341 1342 spin_unlock_irqrestore(ap->lock, flags); 1343 } 1344 1345 /** 1346 * ata_eh_done - EH action complete 1347 * @link: ATA link for which EH actions are complete 1348 * @dev: target ATA dev for per-dev action (can be NULL) 1349 * @action: action just completed 1350 * 1351 * Called right after performing EH actions to clear related bits 1352 * in @link->eh_context. 1353 * 1354 * LOCKING: 1355 * None. 1356 */ 1357 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1358 unsigned int action) 1359 { 1360 struct ata_eh_context *ehc = &link->eh_context; 1361 1362 trace_ata_eh_done(link, dev ? dev->devno : 0, action); 1363 1364 ata_eh_clear_action(link, dev, &ehc->i, action); 1365 } 1366 1367 /** 1368 * ata_err_string - convert err_mask to descriptive string 1369 * @err_mask: error mask to convert to string 1370 * 1371 * Convert @err_mask to descriptive string. Errors are 1372 * prioritized according to severity and only the most severe 1373 * error is reported. 1374 * 1375 * LOCKING: 1376 * None. 1377 * 1378 * RETURNS: 1379 * Descriptive string for @err_mask 1380 */ 1381 static const char *ata_err_string(unsigned int err_mask) 1382 { 1383 if (err_mask & AC_ERR_HOST_BUS) 1384 return "host bus error"; 1385 if (err_mask & AC_ERR_ATA_BUS) 1386 return "ATA bus error"; 1387 if (err_mask & AC_ERR_TIMEOUT) 1388 return "timeout"; 1389 if (err_mask & AC_ERR_HSM) 1390 return "HSM violation"; 1391 if (err_mask & AC_ERR_SYSTEM) 1392 return "internal error"; 1393 if (err_mask & AC_ERR_MEDIA) 1394 return "media error"; 1395 if (err_mask & AC_ERR_INVALID) 1396 return "invalid argument"; 1397 if (err_mask & AC_ERR_DEV) 1398 return "device error"; 1399 if (err_mask & AC_ERR_NCQ) 1400 return "NCQ error"; 1401 if (err_mask & AC_ERR_NODEV_HINT) 1402 return "Polling detection error"; 1403 return "unknown error"; 1404 } 1405 1406 /** 1407 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1408 * @dev: target ATAPI device 1409 * @r_sense_key: out parameter for sense_key 1410 * 1411 * Perform ATAPI TEST_UNIT_READY. 1412 * 1413 * LOCKING: 1414 * EH context (may sleep). 1415 * 1416 * RETURNS: 1417 * 0 on success, AC_ERR_* mask on failure. 1418 */ 1419 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1420 { 1421 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1422 struct ata_taskfile tf; 1423 unsigned int err_mask; 1424 1425 ata_tf_init(dev, &tf); 1426 1427 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1428 tf.command = ATA_CMD_PACKET; 1429 tf.protocol = ATAPI_PROT_NODATA; 1430 1431 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1432 if (err_mask == AC_ERR_DEV) 1433 *r_sense_key = tf.error >> 4; 1434 return err_mask; 1435 } 1436 1437 /** 1438 * ata_eh_decide_disposition - Disposition a qc based on sense data 1439 * @qc: qc to examine 1440 * 1441 * For a regular SCSI command, the SCSI completion callback (scsi_done()) 1442 * will call scsi_complete(), which will call scsi_decide_disposition(), 1443 * which will call scsi_check_sense(). scsi_complete() finally calls 1444 * scsi_finish_command(). This is fine for SCSI, since any eventual sense 1445 * data is usually returned in the completion itself (without invoking SCSI 1446 * EH). However, for a QC, we always need to fetch the sense data 1447 * explicitly using SCSI EH. 1448 * 1449 * A command that is completed via SCSI EH will instead be completed using 1450 * scsi_eh_flush_done_q(), which will call scsi_finish_command() directly 1451 * (without ever calling scsi_check_sense()). 1452 * 1453 * For a command that went through SCSI EH, it is the responsibility of the 1454 * SCSI EH strategy handler to call scsi_decide_disposition(), see e.g. how 1455 * scsi_eh_get_sense() calls scsi_decide_disposition() for SCSI LLDDs that 1456 * do not get the sense data as part of the completion. 1457 * 1458 * Thus, for QC commands that went via SCSI EH, we need to call 1459 * scsi_check_sense() ourselves, similar to how scsi_eh_get_sense() calls 1460 * scsi_decide_disposition(), which calls scsi_check_sense(), in order to 1461 * set the correct SCSI ML byte (if any). 1462 * 1463 * LOCKING: 1464 * EH context. 1465 * 1466 * RETURNS: 1467 * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE 1468 */ 1469 enum scsi_disposition ata_eh_decide_disposition(struct ata_queued_cmd *qc) 1470 { 1471 return scsi_check_sense(qc->scsicmd); 1472 } 1473 1474 /** 1475 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT 1476 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to 1477 * 1478 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK 1479 * SENSE. This function is an EH helper. 1480 * 1481 * LOCKING: 1482 * Kernel thread context (may sleep). 1483 * 1484 * RETURNS: 1485 * true if sense data could be fetched, false otherwise. 1486 */ 1487 static bool ata_eh_request_sense(struct ata_queued_cmd *qc) 1488 { 1489 struct scsi_cmnd *cmd = qc->scsicmd; 1490 struct ata_device *dev = qc->dev; 1491 struct ata_taskfile tf; 1492 unsigned int err_mask; 1493 1494 if (ata_port_is_frozen(qc->ap)) { 1495 ata_dev_warn(dev, "sense data available but port frozen\n"); 1496 return false; 1497 } 1498 1499 if (!ata_id_sense_reporting_enabled(dev->id)) { 1500 ata_dev_warn(qc->dev, "sense data reporting disabled\n"); 1501 return false; 1502 } 1503 1504 ata_tf_init(dev, &tf); 1505 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1506 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1507 tf.command = ATA_CMD_REQ_SENSE_DATA; 1508 tf.protocol = ATA_PROT_NODATA; 1509 1510 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 1511 /* Ignore err_mask; ATA_ERR might be set */ 1512 if (tf.status & ATA_SENSE) { 1513 if (ata_scsi_sense_is_valid(tf.lbah, tf.lbam, tf.lbal)) { 1514 /* Set sense without also setting scsicmd->result */ 1515 scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE, 1516 cmd->sense_buffer, tf.lbah, 1517 tf.lbam, tf.lbal); 1518 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1519 return true; 1520 } 1521 } else { 1522 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", 1523 tf.status, err_mask); 1524 } 1525 1526 return false; 1527 } 1528 1529 /** 1530 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1531 * @dev: device to perform REQUEST_SENSE to 1532 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1533 * @dfl_sense_key: default sense key to use 1534 * 1535 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1536 * SENSE. This function is EH helper. 1537 * 1538 * LOCKING: 1539 * Kernel thread context (may sleep). 1540 * 1541 * RETURNS: 1542 * 0 on success, AC_ERR_* mask on failure 1543 */ 1544 unsigned int atapi_eh_request_sense(struct ata_device *dev, 1545 u8 *sense_buf, u8 dfl_sense_key) 1546 { 1547 u8 cdb[ATAPI_CDB_LEN] = 1548 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1549 struct ata_port *ap = dev->link->ap; 1550 struct ata_taskfile tf; 1551 1552 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1553 1554 /* initialize sense_buf with the error register, 1555 * for the case where they are -not- overwritten 1556 */ 1557 sense_buf[0] = 0x70; 1558 sense_buf[2] = dfl_sense_key; 1559 1560 /* some devices time out if garbage left in tf */ 1561 ata_tf_init(dev, &tf); 1562 1563 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1564 tf.command = ATA_CMD_PACKET; 1565 1566 /* 1567 * Do not use DMA if the connected device only supports PIO, even if the 1568 * port prefers PIO commands via DMA. 1569 * 1570 * Ideally, we should call atapi_check_dma() to check if it is safe for 1571 * the LLD to use DMA for REQUEST_SENSE, but we don't have a qc. 1572 * Since we can't check the command, perhaps we should only use pio? 1573 */ 1574 if ((ap->flags & ATA_FLAG_PIO_DMA) && !(dev->flags & ATA_DFLAG_PIO)) { 1575 tf.protocol = ATAPI_PROT_DMA; 1576 tf.feature |= ATAPI_PKT_DMA; 1577 } else { 1578 tf.protocol = ATAPI_PROT_PIO; 1579 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1580 tf.lbah = 0; 1581 } 1582 1583 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1584 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1585 } 1586 1587 /** 1588 * ata_eh_analyze_serror - analyze SError for a failed port 1589 * @link: ATA link to analyze SError for 1590 * 1591 * Analyze SError if available and further determine cause of 1592 * failure. 1593 * 1594 * LOCKING: 1595 * None. 1596 */ 1597 static void ata_eh_analyze_serror(struct ata_link *link) 1598 { 1599 struct ata_eh_context *ehc = &link->eh_context; 1600 u32 serror = ehc->i.serror; 1601 unsigned int err_mask = 0, action = 0; 1602 u32 hotplug_mask; 1603 1604 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1605 err_mask |= AC_ERR_ATA_BUS; 1606 action |= ATA_EH_RESET; 1607 } 1608 if (serror & SERR_PROTOCOL) { 1609 err_mask |= AC_ERR_HSM; 1610 action |= ATA_EH_RESET; 1611 } 1612 if (serror & SERR_INTERNAL) { 1613 err_mask |= AC_ERR_SYSTEM; 1614 action |= ATA_EH_RESET; 1615 } 1616 1617 /* Determine whether a hotplug event has occurred. Both 1618 * SError.N/X are considered hotplug events for enabled or 1619 * host links. For disabled PMP links, only N bit is 1620 * considered as X bit is left at 1 for link plugging. 1621 */ 1622 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1623 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1624 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1625 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1626 else 1627 hotplug_mask = SERR_PHYRDY_CHG; 1628 1629 if (serror & hotplug_mask) 1630 ata_ehi_hotplugged(&ehc->i); 1631 1632 ehc->i.err_mask |= err_mask; 1633 ehc->i.action |= action; 1634 } 1635 1636 /** 1637 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1638 * @qc: qc to analyze 1639 * 1640 * Analyze taskfile of @qc and further determine cause of 1641 * failure. This function also requests ATAPI sense data if 1642 * available. 1643 * 1644 * LOCKING: 1645 * Kernel thread context (may sleep). 1646 * 1647 * RETURNS: 1648 * Determined recovery action 1649 */ 1650 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc) 1651 { 1652 const struct ata_taskfile *tf = &qc->result_tf; 1653 unsigned int tmp, action = 0; 1654 u8 stat = tf->status, err = tf->error; 1655 1656 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1657 qc->err_mask |= AC_ERR_HSM; 1658 return ATA_EH_RESET; 1659 } 1660 1661 if (stat & (ATA_ERR | ATA_DF)) { 1662 qc->err_mask |= AC_ERR_DEV; 1663 /* 1664 * Sense data reporting does not work if the 1665 * device fault bit is set. 1666 */ 1667 if (stat & ATA_DF) 1668 stat &= ~ATA_SENSE; 1669 } else { 1670 return 0; 1671 } 1672 1673 switch (qc->dev->class) { 1674 case ATA_DEV_ATA: 1675 case ATA_DEV_ZAC: 1676 /* 1677 * Fetch the sense data explicitly if: 1678 * -It was a non-NCQ command that failed, or 1679 * -It was a NCQ command that failed, but the sense data 1680 * was not included in the NCQ command error log 1681 * (i.e. NCQ autosense is not supported by the device). 1682 */ 1683 if (!(qc->flags & ATA_QCFLAG_SENSE_VALID) && 1684 (stat & ATA_SENSE) && ata_eh_request_sense(qc)) 1685 set_status_byte(qc->scsicmd, SAM_STAT_CHECK_CONDITION); 1686 if (err & ATA_ICRC) 1687 qc->err_mask |= AC_ERR_ATA_BUS; 1688 if (err & (ATA_UNC | ATA_AMNF)) 1689 qc->err_mask |= AC_ERR_MEDIA; 1690 if (err & ATA_IDNF) 1691 qc->err_mask |= AC_ERR_INVALID; 1692 break; 1693 1694 case ATA_DEV_ATAPI: 1695 if (!ata_port_is_frozen(qc->ap)) { 1696 tmp = atapi_eh_request_sense(qc->dev, 1697 qc->scsicmd->sense_buffer, 1698 qc->result_tf.error >> 4); 1699 if (!tmp) 1700 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1701 else 1702 qc->err_mask |= tmp; 1703 } 1704 } 1705 1706 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1707 enum scsi_disposition ret = ata_eh_decide_disposition(qc); 1708 1709 /* 1710 * SUCCESS here means that the sense code could be 1711 * evaluated and should be passed to the upper layers 1712 * for correct evaluation. 1713 * FAILED means the sense code could not be interpreted 1714 * and the device would need to be reset. 1715 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the 1716 * command would need to be retried. 1717 */ 1718 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { 1719 qc->flags |= ATA_QCFLAG_RETRY; 1720 qc->err_mask |= AC_ERR_OTHER; 1721 } else if (ret != SUCCESS) { 1722 qc->err_mask |= AC_ERR_HSM; 1723 } 1724 } 1725 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1726 action |= ATA_EH_RESET; 1727 1728 return action; 1729 } 1730 1731 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1732 int *xfer_ok) 1733 { 1734 int base = 0; 1735 1736 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1737 *xfer_ok = 1; 1738 1739 if (!*xfer_ok) 1740 base = ATA_ECAT_DUBIOUS_NONE; 1741 1742 if (err_mask & AC_ERR_ATA_BUS) 1743 return base + ATA_ECAT_ATA_BUS; 1744 1745 if (err_mask & AC_ERR_TIMEOUT) 1746 return base + ATA_ECAT_TOUT_HSM; 1747 1748 if (eflags & ATA_EFLAG_IS_IO) { 1749 if (err_mask & AC_ERR_HSM) 1750 return base + ATA_ECAT_TOUT_HSM; 1751 if ((err_mask & 1752 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1753 return base + ATA_ECAT_UNK_DEV; 1754 } 1755 1756 return 0; 1757 } 1758 1759 struct speed_down_verdict_arg { 1760 u64 since; 1761 int xfer_ok; 1762 int nr_errors[ATA_ECAT_NR]; 1763 }; 1764 1765 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1766 { 1767 struct speed_down_verdict_arg *arg = void_arg; 1768 int cat; 1769 1770 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1771 return -1; 1772 1773 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1774 &arg->xfer_ok); 1775 arg->nr_errors[cat]++; 1776 1777 return 0; 1778 } 1779 1780 /** 1781 * ata_eh_speed_down_verdict - Determine speed down verdict 1782 * @dev: Device of interest 1783 * 1784 * This function examines error ring of @dev and determines 1785 * whether NCQ needs to be turned off, transfer speed should be 1786 * stepped down, or falling back to PIO is necessary. 1787 * 1788 * ECAT_ATA_BUS : ATA_BUS error for any command 1789 * 1790 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1791 * IO commands 1792 * 1793 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1794 * 1795 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1796 * data transfer hasn't been verified. 1797 * 1798 * Verdicts are 1799 * 1800 * NCQ_OFF : Turn off NCQ. 1801 * 1802 * SPEED_DOWN : Speed down transfer speed but don't fall back 1803 * to PIO. 1804 * 1805 * FALLBACK_TO_PIO : Fall back to PIO. 1806 * 1807 * Even if multiple verdicts are returned, only one action is 1808 * taken per error. An action triggered by non-DUBIOUS errors 1809 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1810 * This is to expedite speed down decisions right after device is 1811 * initially configured. 1812 * 1813 * The following are speed down rules. #1 and #2 deal with 1814 * DUBIOUS errors. 1815 * 1816 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1817 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1818 * 1819 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1820 * occurred during last 5 mins, NCQ_OFF. 1821 * 1822 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1823 * occurred during last 5 mins, FALLBACK_TO_PIO 1824 * 1825 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1826 * during last 10 mins, NCQ_OFF. 1827 * 1828 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1829 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1830 * 1831 * LOCKING: 1832 * Inherited from caller. 1833 * 1834 * RETURNS: 1835 * OR of ATA_EH_SPDN_* flags. 1836 */ 1837 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1838 { 1839 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1840 u64 j64 = get_jiffies_64(); 1841 struct speed_down_verdict_arg arg; 1842 unsigned int verdict = 0; 1843 1844 /* scan past 5 mins of error history */ 1845 memset(&arg, 0, sizeof(arg)); 1846 arg.since = j64 - min(j64, j5mins); 1847 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1848 1849 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1850 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1851 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1852 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1853 1854 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1855 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1856 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1857 1858 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1859 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1860 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1861 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1862 1863 /* scan past 10 mins of error history */ 1864 memset(&arg, 0, sizeof(arg)); 1865 arg.since = j64 - min(j64, j10mins); 1866 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1867 1868 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1869 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1870 verdict |= ATA_EH_SPDN_NCQ_OFF; 1871 1872 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1873 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1874 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1875 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1876 1877 return verdict; 1878 } 1879 1880 /** 1881 * ata_eh_speed_down - record error and speed down if necessary 1882 * @dev: Failed device 1883 * @eflags: mask of ATA_EFLAG_* flags 1884 * @err_mask: err_mask of the error 1885 * 1886 * Record error and examine error history to determine whether 1887 * adjusting transmission speed is necessary. It also sets 1888 * transmission limits appropriately if such adjustment is 1889 * necessary. 1890 * 1891 * LOCKING: 1892 * Kernel thread context (may sleep). 1893 * 1894 * RETURNS: 1895 * Determined recovery action. 1896 */ 1897 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1898 unsigned int eflags, unsigned int err_mask) 1899 { 1900 struct ata_link *link = ata_dev_phys_link(dev); 1901 int xfer_ok = 0; 1902 unsigned int verdict; 1903 unsigned int action = 0; 1904 1905 /* don't bother if Cat-0 error */ 1906 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1907 return 0; 1908 1909 /* record error and determine whether speed down is necessary */ 1910 ata_ering_record(&dev->ering, eflags, err_mask); 1911 verdict = ata_eh_speed_down_verdict(dev); 1912 1913 /* turn off NCQ? */ 1914 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && ata_ncq_enabled(dev)) { 1915 dev->flags |= ATA_DFLAG_NCQ_OFF; 1916 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 1917 goto done; 1918 } 1919 1920 /* speed down? */ 1921 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1922 /* speed down SATA link speed if possible */ 1923 if (sata_down_spd_limit(link, 0) == 0) { 1924 action |= ATA_EH_RESET; 1925 goto done; 1926 } 1927 1928 /* lower transfer mode */ 1929 if (dev->spdn_cnt < 2) { 1930 static const int dma_dnxfer_sel[] = 1931 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1932 static const int pio_dnxfer_sel[] = 1933 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1934 int sel; 1935 1936 if (dev->xfer_shift != ATA_SHIFT_PIO) 1937 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1938 else 1939 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1940 1941 dev->spdn_cnt++; 1942 1943 if (ata_down_xfermask_limit(dev, sel) == 0) { 1944 action |= ATA_EH_RESET; 1945 goto done; 1946 } 1947 } 1948 } 1949 1950 /* Fall back to PIO? Slowing down to PIO is meaningless for 1951 * SATA ATA devices. Consider it only for PATA and SATAPI. 1952 */ 1953 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1954 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1955 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1956 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1957 dev->spdn_cnt = 0; 1958 action |= ATA_EH_RESET; 1959 goto done; 1960 } 1961 } 1962 1963 return 0; 1964 done: 1965 /* device has been slowed down, blow error history */ 1966 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1967 ata_ering_clear(&dev->ering); 1968 return action; 1969 } 1970 1971 /** 1972 * ata_eh_worth_retry - analyze error and decide whether to retry 1973 * @qc: qc to possibly retry 1974 * 1975 * Look at the cause of the error and decide if a retry 1976 * might be useful or not. We don't want to retry media errors 1977 * because the drive itself has probably already taken 10-30 seconds 1978 * doing its own internal retries before reporting the failure. 1979 */ 1980 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 1981 { 1982 if (qc->err_mask & AC_ERR_MEDIA) 1983 return 0; /* don't retry media errors */ 1984 if (qc->flags & ATA_QCFLAG_IO) 1985 return 1; /* otherwise retry anything from fs stack */ 1986 if (qc->err_mask & AC_ERR_INVALID) 1987 return 0; /* don't retry these */ 1988 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 1989 } 1990 1991 /** 1992 * ata_eh_quiet - check if we need to be quiet about a command error 1993 * @qc: qc to check 1994 * 1995 * Look at the qc flags anbd its scsi command request flags to determine 1996 * if we need to be quiet about the command failure. 1997 */ 1998 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) 1999 { 2000 if (qc->scsicmd && scsi_cmd_to_rq(qc->scsicmd)->rq_flags & RQF_QUIET) 2001 qc->flags |= ATA_QCFLAG_QUIET; 2002 return qc->flags & ATA_QCFLAG_QUIET; 2003 } 2004 2005 static int ata_eh_get_non_ncq_success_sense(struct ata_link *link) 2006 { 2007 struct ata_port *ap = link->ap; 2008 struct ata_queued_cmd *qc; 2009 2010 qc = __ata_qc_from_tag(ap, link->active_tag); 2011 if (!qc) 2012 return -EIO; 2013 2014 if (!(qc->flags & ATA_QCFLAG_EH) || 2015 !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || 2016 qc->err_mask) 2017 return -EIO; 2018 2019 if (!ata_eh_request_sense(qc)) 2020 return -EIO; 2021 2022 /* 2023 * No point in checking the return value, since the command has already 2024 * completed successfully. 2025 */ 2026 ata_eh_decide_disposition(qc); 2027 2028 return 0; 2029 } 2030 2031 static void ata_eh_get_success_sense(struct ata_link *link) 2032 { 2033 struct ata_eh_context *ehc = &link->eh_context; 2034 struct ata_device *dev = link->device; 2035 struct ata_port *ap = link->ap; 2036 struct ata_queued_cmd *qc; 2037 int tag, ret = 0; 2038 2039 if (!(ehc->i.dev_action[dev->devno] & ATA_EH_GET_SUCCESS_SENSE)) 2040 return; 2041 2042 /* if frozen, we can't do much */ 2043 if (ata_port_is_frozen(ap)) { 2044 ata_dev_warn(dev, 2045 "successful sense data available but port frozen\n"); 2046 goto out; 2047 } 2048 2049 /* 2050 * If the link has sactive set, then we have outstanding NCQ commands 2051 * and have to read the Successful NCQ Commands log to get the sense 2052 * data. Otherwise, we are dealing with a non-NCQ command and use 2053 * request sense ext command to retrieve the sense data. 2054 */ 2055 if (link->sactive) 2056 ret = ata_eh_get_ncq_success_sense(link); 2057 else 2058 ret = ata_eh_get_non_ncq_success_sense(link); 2059 if (ret) 2060 goto out; 2061 2062 ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); 2063 return; 2064 2065 out: 2066 /* 2067 * If we failed to get sense data for a successful command that ought to 2068 * have sense data, we cannot simply return BLK_STS_OK to user space. 2069 * This is because we can't know if the sense data that we couldn't get 2070 * was actually "DATA CURRENTLY UNAVAILABLE". Reporting such a command 2071 * as success to user space would result in a silent data corruption. 2072 * Thus, add a bogus ABORTED_COMMAND sense data to such commands, such 2073 * that SCSI will report these commands as BLK_STS_IOERR to user space. 2074 */ 2075 ata_qc_for_each_raw(ap, qc, tag) { 2076 if (!(qc->flags & ATA_QCFLAG_EH) || 2077 !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || 2078 qc->err_mask || 2079 ata_dev_phys_link(qc->dev) != link) 2080 continue; 2081 2082 /* We managed to get sense for this success command, skip. */ 2083 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2084 continue; 2085 2086 /* This success command did not have any sense data, skip. */ 2087 if (!(qc->result_tf.status & ATA_SENSE)) 2088 continue; 2089 2090 /* This success command had sense data, but we failed to get. */ 2091 ata_scsi_set_sense(dev, qc->scsicmd, ABORTED_COMMAND, 0, 0); 2092 qc->flags |= ATA_QCFLAG_SENSE_VALID; 2093 } 2094 ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); 2095 } 2096 2097 /* 2098 * Check if a link is established. This is a relaxed version of 2099 * ata_phys_link_online() which accounts for the fact that this is potentially 2100 * called after changing the link power management policy, which may not be 2101 * reflected immediately in the SStatus register (e.g., we may still be seeing 2102 * the PHY in partial, slumber or devsleep Partial power management state. 2103 * So check that: 2104 * - A device is still present, that is, DET is 1h (Device presence detected 2105 * but Phy communication not established) or 3h (Device presence detected and 2106 * Phy communication established) 2107 * - Communication is established, that is, IPM is not 0h, indicating that PHY 2108 * is online or in a low power state. 2109 */ 2110 static bool ata_eh_link_established(struct ata_link *link) 2111 { 2112 u32 sstatus; 2113 u8 det, ipm; 2114 2115 /* 2116 * For old IDE/PATA adapters that do not have a valid scr_read method, 2117 * or if reading the SStatus register fails, assume that the device is 2118 * present. Device probe will determine if that is really the case. 2119 */ 2120 if (sata_scr_read(link, SCR_STATUS, &sstatus)) 2121 return true; 2122 2123 det = sstatus & 0x0f; 2124 ipm = (sstatus >> 8) & 0x0f; 2125 2126 return (det & 0x01) && ipm; 2127 } 2128 2129 /** 2130 * ata_eh_link_set_lpm - configure SATA interface power management 2131 * @link: link to configure 2132 * @policy: the link power management policy 2133 * @r_failed_dev: out parameter for failed device 2134 * 2135 * Enable SATA Interface power management. This will enable 2136 * Device Interface Power Management (DIPM) for min_power and 2137 * medium_power_with_dipm policies, and then call driver specific 2138 * callbacks for enabling Host Initiated Power management. 2139 * 2140 * LOCKING: 2141 * EH context. 2142 * 2143 * RETURNS: 2144 * 0 on success, -errno on failure. 2145 */ 2146 static int ata_eh_link_set_lpm(struct ata_link *link, 2147 enum ata_lpm_policy policy, 2148 struct ata_device **r_failed_dev) 2149 { 2150 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 2151 struct ata_eh_context *ehc = &link->eh_context; 2152 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 2153 enum ata_lpm_policy old_policy = link->lpm_policy; 2154 bool host_has_dipm = !(link->ap->flags & ATA_FLAG_NO_DIPM); 2155 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 2156 unsigned int err_mask; 2157 int rc; 2158 2159 /* if the link or host doesn't do LPM, noop */ 2160 if (!IS_ENABLED(CONFIG_SATA_HOST) || 2161 (link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 2162 return 0; 2163 2164 /* 2165 * This function currently assumes that it will never be supplied policy 2166 * ATA_LPM_UNKNOWN. 2167 */ 2168 if (WARN_ON_ONCE(policy == ATA_LPM_UNKNOWN)) 2169 return 0; 2170 2171 ata_link_dbg(link, "Set LPM policy: %d -> %d\n", old_policy, policy); 2172 2173 /* 2174 * DIPM is enabled only for ATA_LPM_MIN_POWER, 2175 * ATA_LPM_MIN_POWER_WITH_PARTIAL, and ATA_LPM_MED_POWER_WITH_DIPM, as 2176 * some devices misbehave when the host NACKs transition to SLUMBER. 2177 */ 2178 ata_for_each_dev(dev, link, ENABLED) { 2179 bool dev_has_hipm = ata_id_has_hipm(dev->id); 2180 bool dev_has_dipm = ata_id_has_dipm(dev->id); 2181 2182 /* find the first enabled and LPM enabled devices */ 2183 if (!link_dev) 2184 link_dev = dev; 2185 2186 if (!lpm_dev && 2187 (dev_has_hipm || (dev_has_dipm && host_has_dipm))) 2188 lpm_dev = dev; 2189 2190 hints &= ~ATA_LPM_EMPTY; 2191 if (!dev_has_hipm) 2192 hints &= ~ATA_LPM_HIPM; 2193 2194 /* disable DIPM before changing link config */ 2195 if (dev_has_dipm) { 2196 err_mask = ata_dev_set_feature(dev, 2197 SETFEATURES_SATA_DISABLE, SATA_DIPM); 2198 if (err_mask && err_mask != AC_ERR_DEV) { 2199 ata_dev_warn(dev, 2200 "failed to disable DIPM, Emask 0x%x\n", 2201 err_mask); 2202 rc = -EIO; 2203 goto fail; 2204 } 2205 } 2206 } 2207 2208 if (ap) { 2209 rc = ap->ops->set_lpm(link, policy, hints); 2210 if (!rc && ap->slave_link) 2211 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 2212 } else 2213 rc = sata_pmp_set_lpm(link, policy, hints); 2214 2215 /* 2216 * Attribute link config failure to the first (LPM) enabled 2217 * device on the link. 2218 */ 2219 if (rc) { 2220 if (rc == -EOPNOTSUPP) { 2221 link->flags |= ATA_LFLAG_NO_LPM; 2222 return 0; 2223 } 2224 dev = lpm_dev ? lpm_dev : link_dev; 2225 goto fail; 2226 } 2227 2228 /* 2229 * Low level driver acked the transition. Issue DIPM command 2230 * with the new policy set. 2231 */ 2232 link->lpm_policy = policy; 2233 if (ap && ap->slave_link) 2234 ap->slave_link->lpm_policy = policy; 2235 2236 /* 2237 * Host config updated, enable DIPM if transitioning to 2238 * ATA_LPM_MIN_POWER, ATA_LPM_MIN_POWER_WITH_PARTIAL, or 2239 * ATA_LPM_MED_POWER_WITH_DIPM. 2240 */ 2241 ata_for_each_dev(dev, link, ENABLED) { 2242 bool dev_has_dipm = ata_id_has_dipm(dev->id); 2243 2244 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && host_has_dipm && 2245 dev_has_dipm) { 2246 err_mask = ata_dev_set_feature(dev, 2247 SETFEATURES_SATA_ENABLE, SATA_DIPM); 2248 if (err_mask && err_mask != AC_ERR_DEV) { 2249 ata_dev_warn(dev, 2250 "failed to enable DIPM, Emask 0x%x\n", 2251 err_mask); 2252 rc = -EIO; 2253 goto fail; 2254 } 2255 } 2256 } 2257 2258 link->last_lpm_change = jiffies; 2259 link->flags |= ATA_LFLAG_CHANGED; 2260 2261 return 0; 2262 2263 fail: 2264 /* restore the old policy */ 2265 link->lpm_policy = old_policy; 2266 if (ap && ap->slave_link) 2267 ap->slave_link->lpm_policy = old_policy; 2268 2269 /* if no device or only one more chance is left, disable LPM */ 2270 if (!dev || ehc->tries[dev->devno] <= 2) { 2271 ata_link_warn(link, "disabling LPM on the link\n"); 2272 link->flags |= ATA_LFLAG_NO_LPM; 2273 } 2274 if (r_failed_dev) 2275 *r_failed_dev = dev; 2276 return rc; 2277 } 2278 2279 /** 2280 * ata_eh_link_autopsy - analyze error and determine recovery action 2281 * @link: host link to perform autopsy on 2282 * 2283 * Analyze why @link failed and determine which recovery actions 2284 * are needed. This function also sets more detailed AC_ERR_* 2285 * values and fills sense data for ATAPI CHECK SENSE. 2286 * 2287 * LOCKING: 2288 * Kernel thread context (may sleep). 2289 */ 2290 static void ata_eh_link_autopsy(struct ata_link *link) 2291 { 2292 struct ata_port *ap = link->ap; 2293 struct ata_eh_context *ehc = &link->eh_context; 2294 struct ata_queued_cmd *qc; 2295 struct ata_device *dev; 2296 unsigned int all_err_mask = 0, eflags = 0; 2297 int tag, nr_failed = 0, nr_quiet = 0; 2298 u32 serror; 2299 int rc; 2300 2301 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2302 return; 2303 2304 /* obtain and analyze SError */ 2305 rc = sata_scr_read(link, SCR_ERROR, &serror); 2306 if (rc == 0) { 2307 ehc->i.serror |= serror; 2308 ata_eh_analyze_serror(link); 2309 } else if (rc != -EOPNOTSUPP) { 2310 /* SError read failed, force reset and probing */ 2311 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2312 ehc->i.action |= ATA_EH_RESET; 2313 ehc->i.err_mask |= AC_ERR_OTHER; 2314 } 2315 2316 /* analyze NCQ failure */ 2317 ata_eh_analyze_ncq_error(link); 2318 2319 /* 2320 * Check if this was a successful command that simply needs sense data. 2321 * Since the sense data is not part of the completion, we need to fetch 2322 * it using an additional command. Since this can't be done from irq 2323 * context, the sense data for successful commands are fetched by EH. 2324 */ 2325 ata_eh_get_success_sense(link); 2326 2327 /* any real error trumps AC_ERR_OTHER */ 2328 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2329 ehc->i.err_mask &= ~AC_ERR_OTHER; 2330 2331 all_err_mask |= ehc->i.err_mask; 2332 2333 ata_qc_for_each_raw(ap, qc, tag) { 2334 if (!(qc->flags & ATA_QCFLAG_EH) || 2335 qc->flags & ATA_QCFLAG_RETRY || 2336 qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD || 2337 ata_dev_phys_link(qc->dev) != link) 2338 continue; 2339 2340 /* inherit upper level err_mask */ 2341 qc->err_mask |= ehc->i.err_mask; 2342 2343 /* analyze TF */ 2344 ehc->i.action |= ata_eh_analyze_tf(qc); 2345 2346 /* DEV errors are probably spurious in case of ATA_BUS error */ 2347 if (qc->err_mask & AC_ERR_ATA_BUS) 2348 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2349 AC_ERR_INVALID); 2350 2351 /* any real error trumps unknown error */ 2352 if (qc->err_mask & ~AC_ERR_OTHER) 2353 qc->err_mask &= ~AC_ERR_OTHER; 2354 2355 /* 2356 * SENSE_VALID trumps dev/unknown error and revalidation. Upper 2357 * layers will determine whether the command is worth retrying 2358 * based on the sense data and device class/type. Otherwise, 2359 * determine directly if the command is worth retrying using its 2360 * error mask and flags. 2361 */ 2362 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2363 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2364 else if (ata_eh_worth_retry(qc)) 2365 qc->flags |= ATA_QCFLAG_RETRY; 2366 2367 /* accumulate error info */ 2368 ehc->i.dev = qc->dev; 2369 all_err_mask |= qc->err_mask; 2370 if (qc->flags & ATA_QCFLAG_IO) 2371 eflags |= ATA_EFLAG_IS_IO; 2372 trace_ata_eh_link_autopsy_qc(qc); 2373 2374 /* Count quiet errors */ 2375 if (ata_eh_quiet(qc)) 2376 nr_quiet++; 2377 nr_failed++; 2378 } 2379 2380 /* If all failed commands requested silence, then be quiet */ 2381 if (nr_quiet == nr_failed) 2382 ehc->i.flags |= ATA_EHI_QUIET; 2383 2384 /* enforce default EH actions */ 2385 if (ata_port_is_frozen(ap) || 2386 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2387 ehc->i.action |= ATA_EH_RESET; 2388 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2389 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2390 ehc->i.action |= ATA_EH_REVALIDATE; 2391 2392 /* If we have offending qcs and the associated failed device, 2393 * perform per-dev EH action only on the offending device. 2394 */ 2395 if (ehc->i.dev) { 2396 ehc->i.dev_action[ehc->i.dev->devno] |= 2397 ehc->i.action & ATA_EH_PERDEV_MASK; 2398 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2399 } 2400 2401 /* propagate timeout to host link */ 2402 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2403 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2404 2405 /* record error and consider speeding down */ 2406 dev = ehc->i.dev; 2407 if (!dev && ((ata_link_max_devices(link) == 1 && 2408 ata_dev_enabled(link->device)))) 2409 dev = link->device; 2410 2411 if (dev) { 2412 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2413 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2414 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2415 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); 2416 } 2417 } 2418 2419 /** 2420 * ata_eh_autopsy - analyze error and determine recovery action 2421 * @ap: host port to perform autopsy on 2422 * 2423 * Analyze all links of @ap and determine why they failed and 2424 * which recovery actions are needed. 2425 * 2426 * LOCKING: 2427 * Kernel thread context (may sleep). 2428 */ 2429 void ata_eh_autopsy(struct ata_port *ap) 2430 { 2431 struct ata_link *link; 2432 2433 ata_for_each_link(link, ap, EDGE) 2434 ata_eh_link_autopsy(link); 2435 2436 /* Handle the frigging slave link. Autopsy is done similarly 2437 * but actions and flags are transferred over to the master 2438 * link and handled from there. 2439 */ 2440 if (ap->slave_link) { 2441 struct ata_eh_context *mehc = &ap->link.eh_context; 2442 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2443 2444 /* transfer control flags from master to slave */ 2445 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2446 2447 /* perform autopsy on the slave link */ 2448 ata_eh_link_autopsy(ap->slave_link); 2449 2450 /* transfer actions from slave to master and clear slave */ 2451 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2452 mehc->i.action |= sehc->i.action; 2453 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2454 mehc->i.flags |= sehc->i.flags; 2455 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2456 } 2457 2458 /* Autopsy of fanout ports can affect host link autopsy. 2459 * Perform host link autopsy last. 2460 */ 2461 if (sata_pmp_attached(ap)) 2462 ata_eh_link_autopsy(&ap->link); 2463 } 2464 2465 /** 2466 * ata_get_cmd_name - get name for ATA command 2467 * @command: ATA command code to get name for 2468 * 2469 * Return a textual name of the given command or "unknown" 2470 * 2471 * LOCKING: 2472 * None 2473 */ 2474 const char *ata_get_cmd_name(u8 command) 2475 { 2476 #ifdef CONFIG_ATA_VERBOSE_ERROR 2477 static const struct 2478 { 2479 u8 command; 2480 const char *text; 2481 } cmd_descr[] = { 2482 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2483 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2484 { ATA_CMD_STANDBY, "STANDBY" }, 2485 { ATA_CMD_IDLE, "IDLE" }, 2486 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2487 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2488 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, 2489 { ATA_CMD_NOP, "NOP" }, 2490 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2491 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2492 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2493 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2494 { ATA_CMD_SERVICE, "SERVICE" }, 2495 { ATA_CMD_READ, "READ DMA" }, 2496 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2497 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2498 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2499 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2500 { ATA_CMD_WRITE, "WRITE DMA" }, 2501 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2502 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2503 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2504 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2505 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2506 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2507 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2508 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2509 { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" }, 2510 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, 2511 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, 2512 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2513 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2514 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2515 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2516 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2517 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2518 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2519 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2520 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2521 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2522 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2523 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2524 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2525 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2526 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2527 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2528 { ATA_CMD_SLEEP, "SLEEP" }, 2529 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2530 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2531 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2532 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2533 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2534 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2535 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2536 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2537 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2538 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, 2539 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2540 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2541 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2542 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2543 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2544 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, 2545 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2546 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, 2547 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2548 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2549 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2550 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2551 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2552 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2553 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2554 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2555 { ATA_CMD_SMART, "SMART" }, 2556 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2557 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2558 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2559 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2560 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2561 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2562 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2563 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2564 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2565 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, 2566 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, 2567 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, 2568 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, 2569 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2570 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2571 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2572 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2573 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2574 { 0, NULL } /* terminate list */ 2575 }; 2576 2577 unsigned int i; 2578 for (i = 0; cmd_descr[i].text; i++) 2579 if (cmd_descr[i].command == command) 2580 return cmd_descr[i].text; 2581 #endif 2582 2583 return "unknown"; 2584 } 2585 EXPORT_SYMBOL_GPL(ata_get_cmd_name); 2586 2587 /** 2588 * ata_eh_link_report - report error handling to user 2589 * @link: ATA link EH is going on 2590 * 2591 * Report EH to user. 2592 * 2593 * LOCKING: 2594 * None. 2595 */ 2596 static void ata_eh_link_report(struct ata_link *link) 2597 { 2598 struct ata_port *ap = link->ap; 2599 struct ata_eh_context *ehc = &link->eh_context; 2600 struct ata_queued_cmd *qc; 2601 const char *frozen, *desc; 2602 char tries_buf[16] = ""; 2603 int tag, nr_failed = 0; 2604 2605 if (ehc->i.flags & ATA_EHI_QUIET) 2606 return; 2607 2608 desc = NULL; 2609 if (ehc->i.desc[0] != '\0') 2610 desc = ehc->i.desc; 2611 2612 ata_qc_for_each_raw(ap, qc, tag) { 2613 if (!(qc->flags & ATA_QCFLAG_EH) || 2614 ata_dev_phys_link(qc->dev) != link || 2615 ((qc->flags & ATA_QCFLAG_QUIET) && 2616 qc->err_mask == AC_ERR_DEV)) 2617 continue; 2618 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2619 continue; 2620 2621 nr_failed++; 2622 } 2623 2624 if (!nr_failed && !ehc->i.err_mask) 2625 return; 2626 2627 frozen = ""; 2628 if (ata_port_is_frozen(ap)) 2629 frozen = " frozen"; 2630 2631 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2632 snprintf(tries_buf, sizeof(tries_buf), " t%d", 2633 ap->eh_tries); 2634 2635 if (ehc->i.dev) { 2636 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2637 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2638 ehc->i.err_mask, link->sactive, ehc->i.serror, 2639 ehc->i.action, frozen, tries_buf); 2640 if (desc) 2641 ata_dev_err(ehc->i.dev, "%s\n", desc); 2642 } else { 2643 ata_link_err(link, "exception Emask 0x%x " 2644 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2645 ehc->i.err_mask, link->sactive, ehc->i.serror, 2646 ehc->i.action, frozen, tries_buf); 2647 if (desc) 2648 ata_link_err(link, "%s\n", desc); 2649 } 2650 2651 #ifdef CONFIG_ATA_VERBOSE_ERROR 2652 if (ehc->i.serror) 2653 ata_link_err(link, 2654 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2655 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2656 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2657 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2658 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2659 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2660 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2661 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2662 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2663 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2664 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2665 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2666 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2667 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2668 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2669 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2670 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2671 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2672 #endif 2673 2674 ata_qc_for_each_raw(ap, qc, tag) { 2675 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2676 char data_buf[20] = ""; 2677 char cdb_buf[70] = ""; 2678 2679 if (!(qc->flags & ATA_QCFLAG_EH) || 2680 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2681 continue; 2682 2683 if (qc->dma_dir != DMA_NONE) { 2684 static const char *dma_str[] = { 2685 [DMA_BIDIRECTIONAL] = "bidi", 2686 [DMA_TO_DEVICE] = "out", 2687 [DMA_FROM_DEVICE] = "in", 2688 }; 2689 const char *prot_str = NULL; 2690 2691 switch (qc->tf.protocol) { 2692 case ATA_PROT_UNKNOWN: 2693 prot_str = "unknown"; 2694 break; 2695 case ATA_PROT_NODATA: 2696 prot_str = "nodata"; 2697 break; 2698 case ATA_PROT_PIO: 2699 prot_str = "pio"; 2700 break; 2701 case ATA_PROT_DMA: 2702 prot_str = "dma"; 2703 break; 2704 case ATA_PROT_NCQ: 2705 prot_str = "ncq dma"; 2706 break; 2707 case ATA_PROT_NCQ_NODATA: 2708 prot_str = "ncq nodata"; 2709 break; 2710 case ATAPI_PROT_NODATA: 2711 prot_str = "nodata"; 2712 break; 2713 case ATAPI_PROT_PIO: 2714 prot_str = "pio"; 2715 break; 2716 case ATAPI_PROT_DMA: 2717 prot_str = "dma"; 2718 break; 2719 } 2720 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2721 prot_str, qc->nbytes, dma_str[qc->dma_dir]); 2722 } 2723 2724 if (ata_is_atapi(qc->tf.protocol)) { 2725 const u8 *cdb = qc->cdb; 2726 size_t cdb_len = qc->dev->cdb_len; 2727 2728 if (qc->scsicmd) { 2729 cdb = qc->scsicmd->cmnd; 2730 cdb_len = qc->scsicmd->cmd_len; 2731 } 2732 __scsi_format_command(cdb_buf, sizeof(cdb_buf), 2733 cdb, cdb_len); 2734 } else 2735 ata_dev_err(qc->dev, "failed command: %s\n", 2736 ata_get_cmd_name(cmd->command)); 2737 2738 ata_dev_err(qc->dev, 2739 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2740 "tag %d%s\n %s" 2741 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2742 "Emask 0x%x (%s)%s\n", 2743 cmd->command, cmd->feature, cmd->nsect, 2744 cmd->lbal, cmd->lbam, cmd->lbah, 2745 cmd->hob_feature, cmd->hob_nsect, 2746 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2747 cmd->device, qc->tag, data_buf, cdb_buf, 2748 res->status, res->error, res->nsect, 2749 res->lbal, res->lbam, res->lbah, 2750 res->hob_feature, res->hob_nsect, 2751 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2752 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2753 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2754 2755 #ifdef CONFIG_ATA_VERBOSE_ERROR 2756 if (res->status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2757 ATA_SENSE | ATA_ERR)) { 2758 if (res->status & ATA_BUSY) 2759 ata_dev_err(qc->dev, "status: { Busy }\n"); 2760 else 2761 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", 2762 res->status & ATA_DRDY ? "DRDY " : "", 2763 res->status & ATA_DF ? "DF " : "", 2764 res->status & ATA_DRQ ? "DRQ " : "", 2765 res->status & ATA_SENSE ? "SENSE " : "", 2766 res->status & ATA_ERR ? "ERR " : ""); 2767 } 2768 2769 if (cmd->command != ATA_CMD_PACKET && 2770 (res->error & (ATA_ICRC | ATA_UNC | ATA_AMNF | ATA_IDNF | 2771 ATA_ABORTED))) 2772 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", 2773 res->error & ATA_ICRC ? "ICRC " : "", 2774 res->error & ATA_UNC ? "UNC " : "", 2775 res->error & ATA_AMNF ? "AMNF " : "", 2776 res->error & ATA_IDNF ? "IDNF " : "", 2777 res->error & ATA_ABORTED ? "ABRT " : ""); 2778 #endif 2779 } 2780 } 2781 2782 /** 2783 * ata_eh_report - report error handling to user 2784 * @ap: ATA port to report EH about 2785 * 2786 * Report EH to user. 2787 * 2788 * LOCKING: 2789 * None. 2790 */ 2791 void ata_eh_report(struct ata_port *ap) 2792 { 2793 struct ata_link *link; 2794 2795 ata_for_each_link(link, ap, HOST_FIRST) 2796 ata_eh_link_report(link); 2797 } 2798 2799 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2800 unsigned int *classes, unsigned long deadline, 2801 bool clear_classes) 2802 { 2803 struct ata_device *dev; 2804 2805 if (clear_classes) 2806 ata_for_each_dev(dev, link, ALL) 2807 classes[dev->devno] = ATA_DEV_UNKNOWN; 2808 2809 return reset(link, classes, deadline); 2810 } 2811 2812 static bool ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2813 { 2814 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2815 return false; 2816 if (rc == -EAGAIN) 2817 return true; 2818 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2819 return true; 2820 return false; 2821 } 2822 2823 int ata_eh_reset(struct ata_link *link, int classify, 2824 struct ata_reset_operations *reset_ops) 2825 { 2826 struct ata_port *ap = link->ap; 2827 struct ata_link *slave = ap->slave_link; 2828 struct ata_eh_context *ehc = &link->eh_context; 2829 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2830 ata_reset_fn_t hardreset = reset_ops->hardreset; 2831 ata_reset_fn_t softreset = reset_ops->softreset; 2832 ata_prereset_fn_t prereset = reset_ops->prereset; 2833 ata_postreset_fn_t postreset = reset_ops->postreset; 2834 unsigned int *classes = ehc->classes; 2835 unsigned int lflags = link->flags; 2836 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2837 int max_tries = 0, try = 0; 2838 struct ata_link *failed_link; 2839 struct ata_device *dev; 2840 unsigned long deadline, now; 2841 ata_reset_fn_t reset; 2842 unsigned long flags; 2843 u32 sstatus; 2844 int nr_unknown, rc; 2845 2846 /* 2847 * Prepare to reset 2848 */ 2849 while (ata_eh_reset_timeouts[max_tries] != UINT_MAX) 2850 max_tries++; 2851 if (link->flags & ATA_LFLAG_RST_ONCE) 2852 max_tries = 1; 2853 if (link->flags & ATA_LFLAG_NO_HRST) 2854 hardreset = NULL; 2855 if (link->flags & ATA_LFLAG_NO_SRST) 2856 softreset = NULL; 2857 2858 /* make sure each reset attempt is at least COOL_DOWN apart */ 2859 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2860 now = jiffies; 2861 WARN_ON(time_after(ehc->last_reset, now)); 2862 deadline = ata_deadline(ehc->last_reset, 2863 ATA_EH_RESET_COOL_DOWN); 2864 if (time_before(now, deadline)) 2865 schedule_timeout_uninterruptible(deadline - now); 2866 } 2867 2868 spin_lock_irqsave(ap->lock, flags); 2869 ap->pflags |= ATA_PFLAG_RESETTING; 2870 spin_unlock_irqrestore(ap->lock, flags); 2871 2872 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2873 2874 ata_for_each_dev(dev, link, ALL) { 2875 /* If we issue an SRST then an ATA drive (not ATAPI) 2876 * may change configuration and be in PIO0 timing. If 2877 * we do a hard reset (or are coming from power on) 2878 * this is true for ATA or ATAPI. Until we've set a 2879 * suitable controller mode we should not touch the 2880 * bus as we may be talking too fast. 2881 */ 2882 dev->pio_mode = XFER_PIO_0; 2883 dev->dma_mode = 0xff; 2884 2885 /* If the controller has a pio mode setup function 2886 * then use it to set the chipset to rights. Don't 2887 * touch the DMA setup as that will be dealt with when 2888 * configuring devices. 2889 */ 2890 if (ap->ops->set_piomode) 2891 ap->ops->set_piomode(ap, dev); 2892 } 2893 2894 /* prefer hardreset */ 2895 reset = NULL; 2896 ehc->i.action &= ~ATA_EH_RESET; 2897 if (hardreset) { 2898 reset = hardreset; 2899 ehc->i.action |= ATA_EH_HARDRESET; 2900 } else if (softreset) { 2901 reset = softreset; 2902 ehc->i.action |= ATA_EH_SOFTRESET; 2903 } 2904 2905 if (prereset) { 2906 unsigned long deadline = ata_deadline(jiffies, 2907 ATA_EH_PRERESET_TIMEOUT); 2908 2909 if (slave) { 2910 sehc->i.action &= ~ATA_EH_RESET; 2911 sehc->i.action |= ehc->i.action; 2912 } 2913 2914 rc = prereset(link, deadline); 2915 2916 /* If present, do prereset on slave link too. Reset 2917 * is skipped iff both master and slave links report 2918 * -ENOENT or clear ATA_EH_RESET. 2919 */ 2920 if (slave && (rc == 0 || rc == -ENOENT)) { 2921 int tmp; 2922 2923 tmp = prereset(slave, deadline); 2924 if (tmp != -ENOENT) 2925 rc = tmp; 2926 2927 ehc->i.action |= sehc->i.action; 2928 } 2929 2930 if (rc) { 2931 if (rc == -ENOENT) { 2932 ata_link_dbg(link, "port disabled--ignoring\n"); 2933 ehc->i.action &= ~ATA_EH_RESET; 2934 2935 ata_for_each_dev(dev, link, ALL) 2936 classes[dev->devno] = ATA_DEV_NONE; 2937 2938 rc = 0; 2939 } else 2940 ata_link_err(link, 2941 "prereset failed (errno=%d)\n", 2942 rc); 2943 goto out; 2944 } 2945 2946 /* prereset() might have cleared ATA_EH_RESET. If so, 2947 * bang classes, thaw and return. 2948 */ 2949 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2950 ata_for_each_dev(dev, link, ALL) 2951 classes[dev->devno] = ATA_DEV_NONE; 2952 if (ata_port_is_frozen(ap) && ata_is_host_link(link)) 2953 ata_eh_thaw_port(ap); 2954 rc = 0; 2955 goto out; 2956 } 2957 } 2958 2959 retry: 2960 /* 2961 * Perform reset 2962 */ 2963 if (ata_is_host_link(link)) 2964 ata_eh_freeze_port(ap); 2965 2966 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2967 2968 if (reset) { 2969 if (verbose) 2970 ata_link_info(link, "%s resetting link\n", 2971 reset == softreset ? "soft" : "hard"); 2972 2973 /* mark that this EH session started with reset */ 2974 ehc->last_reset = jiffies; 2975 if (reset == hardreset) { 2976 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2977 trace_ata_link_hardreset_begin(link, classes, deadline); 2978 } else { 2979 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2980 trace_ata_link_softreset_begin(link, classes, deadline); 2981 } 2982 2983 rc = ata_do_reset(link, reset, classes, deadline, true); 2984 if (reset == hardreset) 2985 trace_ata_link_hardreset_end(link, classes, rc); 2986 else 2987 trace_ata_link_softreset_end(link, classes, rc); 2988 if (rc && rc != -EAGAIN) { 2989 failed_link = link; 2990 goto fail; 2991 } 2992 2993 /* hardreset slave link if existent */ 2994 if (slave && reset == hardreset) { 2995 int tmp; 2996 2997 if (verbose) 2998 ata_link_info(slave, "hard resetting link\n"); 2999 3000 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 3001 trace_ata_slave_hardreset_begin(slave, classes, 3002 deadline); 3003 tmp = ata_do_reset(slave, reset, classes, deadline, 3004 false); 3005 trace_ata_slave_hardreset_end(slave, classes, tmp); 3006 switch (tmp) { 3007 case -EAGAIN: 3008 rc = -EAGAIN; 3009 break; 3010 case 0: 3011 break; 3012 default: 3013 failed_link = slave; 3014 rc = tmp; 3015 goto fail; 3016 } 3017 } 3018 3019 /* perform follow-up SRST if necessary */ 3020 if (reset == hardreset && 3021 ata_eh_followup_srst_needed(link, rc)) { 3022 reset = softreset; 3023 3024 if (!reset) { 3025 ata_link_err(link, 3026 "follow-up softreset required but no softreset available\n"); 3027 failed_link = link; 3028 rc = -EINVAL; 3029 goto fail; 3030 } 3031 3032 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 3033 trace_ata_link_softreset_begin(link, classes, deadline); 3034 rc = ata_do_reset(link, reset, classes, deadline, true); 3035 trace_ata_link_softreset_end(link, classes, rc); 3036 if (rc) { 3037 failed_link = link; 3038 goto fail; 3039 } 3040 } 3041 } else { 3042 if (verbose) 3043 ata_link_info(link, 3044 "no reset method available, skipping reset\n"); 3045 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 3046 lflags |= ATA_LFLAG_ASSUME_ATA; 3047 } 3048 3049 /* 3050 * Post-reset processing 3051 */ 3052 ata_for_each_dev(dev, link, ALL) { 3053 /* After the reset, the device state is PIO 0 and the 3054 * controller state is undefined. Reset also wakes up 3055 * drives from sleeping mode. 3056 */ 3057 dev->pio_mode = XFER_PIO_0; 3058 dev->flags &= ~ATA_DFLAG_SLEEPING; 3059 3060 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3061 continue; 3062 3063 /* apply class override */ 3064 if (lflags & ATA_LFLAG_ASSUME_ATA) 3065 classes[dev->devno] = ATA_DEV_ATA; 3066 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 3067 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 3068 } 3069 3070 /* record current link speed */ 3071 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 3072 link->sata_spd = (sstatus >> 4) & 0xf; 3073 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 3074 slave->sata_spd = (sstatus >> 4) & 0xf; 3075 3076 /* thaw the port */ 3077 if (ata_is_host_link(link)) 3078 ata_eh_thaw_port(ap); 3079 3080 /* postreset() should clear hardware SError. Although SError 3081 * is cleared during link resume, clearing SError here is 3082 * necessary as some PHYs raise hotplug events after SRST. 3083 * This introduces race condition where hotplug occurs between 3084 * reset and here. This race is mediated by cross checking 3085 * link onlineness and classification result later. 3086 */ 3087 if (postreset) { 3088 postreset(link, classes); 3089 trace_ata_link_postreset(link, classes, rc); 3090 if (slave) { 3091 postreset(slave, classes); 3092 trace_ata_slave_postreset(slave, classes, rc); 3093 } 3094 } 3095 3096 /* clear cached SError */ 3097 spin_lock_irqsave(link->ap->lock, flags); 3098 link->eh_info.serror = 0; 3099 if (slave) 3100 slave->eh_info.serror = 0; 3101 spin_unlock_irqrestore(link->ap->lock, flags); 3102 3103 /* 3104 * Make sure onlineness and classification result correspond. 3105 * Hotplug could have happened during reset and some 3106 * controllers fail to wait while a drive is spinning up after 3107 * being hotplugged causing misdetection. By cross checking 3108 * link on/offlineness and classification result, those 3109 * conditions can be reliably detected and retried. 3110 */ 3111 nr_unknown = 0; 3112 ata_for_each_dev(dev, link, ALL) { 3113 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 3114 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 3115 ata_dev_dbg(dev, "link online but device misclassified\n"); 3116 classes[dev->devno] = ATA_DEV_NONE; 3117 nr_unknown++; 3118 } 3119 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3120 if (ata_class_enabled(classes[dev->devno])) 3121 ata_dev_dbg(dev, 3122 "link offline, clearing class %d to NONE\n", 3123 classes[dev->devno]); 3124 classes[dev->devno] = ATA_DEV_NONE; 3125 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 3126 ata_dev_dbg(dev, 3127 "link status unknown, clearing UNKNOWN to NONE\n"); 3128 classes[dev->devno] = ATA_DEV_NONE; 3129 } 3130 } 3131 3132 if (classify && nr_unknown) { 3133 if (try < max_tries) { 3134 ata_link_warn(link, 3135 "link online but %d devices misclassified, retrying\n", 3136 nr_unknown); 3137 failed_link = link; 3138 rc = -EAGAIN; 3139 goto fail; 3140 } 3141 ata_link_warn(link, 3142 "link online but %d devices misclassified, " 3143 "device detection might fail\n", nr_unknown); 3144 } 3145 3146 /* reset successful, schedule revalidation */ 3147 ata_eh_done(link, NULL, ATA_EH_RESET); 3148 if (slave) 3149 ata_eh_done(slave, NULL, ATA_EH_RESET); 3150 ehc->last_reset = jiffies; /* update to completion time */ 3151 ehc->i.action |= ATA_EH_REVALIDATE; 3152 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 3153 3154 rc = 0; 3155 out: 3156 /* clear hotplug flag */ 3157 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 3158 if (slave) 3159 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 3160 3161 spin_lock_irqsave(ap->lock, flags); 3162 ap->pflags &= ~ATA_PFLAG_RESETTING; 3163 spin_unlock_irqrestore(ap->lock, flags); 3164 3165 return rc; 3166 3167 fail: 3168 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 3169 if (!ata_is_host_link(link) && 3170 sata_scr_read(link, SCR_STATUS, &sstatus)) 3171 rc = -ERESTART; 3172 3173 if (try >= max_tries) { 3174 /* 3175 * Thaw host port even if reset failed, so that the port 3176 * can be retried on the next phy event. This risks 3177 * repeated EH runs but seems to be a better tradeoff than 3178 * shutting down a port after a botched hotplug attempt. 3179 */ 3180 if (ata_is_host_link(link)) 3181 ata_eh_thaw_port(ap); 3182 ata_link_warn(link, "%s failed\n", 3183 reset == hardreset ? "hardreset" : "softreset"); 3184 goto out; 3185 } 3186 3187 now = jiffies; 3188 if (time_before(now, deadline)) { 3189 unsigned long delta = deadline - now; 3190 3191 ata_link_warn(failed_link, 3192 "reset failed (errno=%d), retrying in %u secs\n", 3193 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 3194 3195 ata_eh_release(ap); 3196 while (delta) 3197 delta = schedule_timeout_uninterruptible(delta); 3198 ata_eh_acquire(ap); 3199 } 3200 3201 /* 3202 * While disks spinup behind PMP, some controllers fail sending SRST. 3203 * They need to be reset - as well as the PMP - before retrying. 3204 */ 3205 if (rc == -ERESTART) { 3206 if (ata_is_host_link(link)) 3207 ata_eh_thaw_port(ap); 3208 goto out; 3209 } 3210 3211 if (try == max_tries - 1) { 3212 sata_down_spd_limit(link, 0); 3213 if (slave) 3214 sata_down_spd_limit(slave, 0); 3215 } else if (rc == -EPIPE) 3216 sata_down_spd_limit(failed_link, 0); 3217 3218 if (hardreset) 3219 reset = hardreset; 3220 goto retry; 3221 } 3222 3223 static inline void ata_eh_pull_park_action(struct ata_port *ap) 3224 { 3225 struct ata_link *link; 3226 struct ata_device *dev; 3227 unsigned long flags; 3228 3229 /* 3230 * This function can be thought of as an extended version of 3231 * ata_eh_about_to_do() specially crafted to accommodate the 3232 * requirements of ATA_EH_PARK handling. Since the EH thread 3233 * does not leave the do {} while () loop in ata_eh_recover as 3234 * long as the timeout for a park request to *one* device on 3235 * the port has not expired, and since we still want to pick 3236 * up park requests to other devices on the same port or 3237 * timeout updates for the same device, we have to pull 3238 * ATA_EH_PARK actions from eh_info into eh_context.i 3239 * ourselves at the beginning of each pass over the loop. 3240 * 3241 * Additionally, all write accesses to &ap->park_req_pending 3242 * through reinit_completion() (see below) or complete_all() 3243 * (see ata_scsi_park_store()) are protected by the host lock. 3244 * As a result we have that park_req_pending.done is zero on 3245 * exit from this function, i.e. when ATA_EH_PARK actions for 3246 * *all* devices on port ap have been pulled into the 3247 * respective eh_context structs. If, and only if, 3248 * park_req_pending.done is non-zero by the time we reach 3249 * wait_for_completion_timeout(), another ATA_EH_PARK action 3250 * has been scheduled for at least one of the devices on port 3251 * ap and we have to cycle over the do {} while () loop in 3252 * ata_eh_recover() again. 3253 */ 3254 3255 spin_lock_irqsave(ap->lock, flags); 3256 reinit_completion(&ap->park_req_pending); 3257 ata_for_each_link(link, ap, EDGE) { 3258 ata_for_each_dev(dev, link, ALL) { 3259 struct ata_eh_info *ehi = &link->eh_info; 3260 3261 link->eh_context.i.dev_action[dev->devno] |= 3262 ehi->dev_action[dev->devno] & ATA_EH_PARK; 3263 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 3264 } 3265 } 3266 spin_unlock_irqrestore(ap->lock, flags); 3267 } 3268 3269 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3270 { 3271 struct ata_eh_context *ehc = &dev->link->eh_context; 3272 struct ata_taskfile tf; 3273 unsigned int err_mask; 3274 3275 ata_tf_init(dev, &tf); 3276 if (park) { 3277 ehc->unloaded_mask |= 1 << dev->devno; 3278 tf.command = ATA_CMD_IDLEIMMEDIATE; 3279 tf.feature = 0x44; 3280 tf.lbal = 0x4c; 3281 tf.lbam = 0x4e; 3282 tf.lbah = 0x55; 3283 } else { 3284 ehc->unloaded_mask &= ~(1 << dev->devno); 3285 tf.command = ATA_CMD_CHK_POWER; 3286 } 3287 3288 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3289 tf.protocol = ATA_PROT_NODATA; 3290 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3291 if (park && (err_mask || tf.lbal != 0xc4)) { 3292 ata_dev_err(dev, "head unload failed!\n"); 3293 ehc->unloaded_mask &= ~(1 << dev->devno); 3294 } 3295 } 3296 3297 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3298 struct ata_device **r_failed_dev) 3299 { 3300 struct ata_port *ap = link->ap; 3301 struct ata_eh_context *ehc = &link->eh_context; 3302 struct ata_device *dev; 3303 unsigned int new_mask = 0; 3304 unsigned long flags; 3305 int rc = 0; 3306 3307 /* For PATA drive side cable detection to work, IDENTIFY must 3308 * be done backwards such that PDIAG- is released by the slave 3309 * device before the master device is identified. 3310 */ 3311 ata_for_each_dev(dev, link, ALL_REVERSE) { 3312 unsigned int action = ata_eh_dev_action(dev); 3313 unsigned int readid_flags = 0; 3314 3315 if (ehc->i.flags & ATA_EHI_DID_RESET) 3316 readid_flags |= ATA_READID_POSTRESET; 3317 3318 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3319 WARN_ON(dev->class == ATA_DEV_PMP); 3320 3321 /* 3322 * The link may be in a deep sleep, wake it up. 3323 * 3324 * If the link is in deep sleep, ata_phys_link_offline() 3325 * will return true, causing the revalidation to fail, 3326 * which leads to a (potentially) needless hard reset. 3327 * 3328 * ata_eh_recover() will later restore the link policy 3329 * to ap->target_lpm_policy after revalidation is done. 3330 */ 3331 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3332 rc = ata_eh_link_set_lpm(link, ATA_LPM_MAX_POWER, 3333 r_failed_dev); 3334 if (rc) 3335 goto err; 3336 } 3337 3338 if (!ata_eh_link_established(ata_dev_phys_link(dev))) { 3339 rc = -EIO; 3340 goto err; 3341 } 3342 3343 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3344 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3345 readid_flags); 3346 if (rc) 3347 goto err; 3348 3349 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3350 3351 /* Configuration may have changed, reconfigure 3352 * transfer mode. 3353 */ 3354 ehc->i.flags |= ATA_EHI_SETMODE; 3355 3356 /* schedule the scsi_rescan_device() here */ 3357 schedule_delayed_work(&ap->scsi_rescan_task, 0); 3358 } else if (dev->class == ATA_DEV_UNKNOWN && 3359 ehc->tries[dev->devno] && 3360 ata_class_enabled(ehc->classes[dev->devno])) { 3361 /* Temporarily set dev->class, it will be 3362 * permanently set once all configurations are 3363 * complete. This is necessary because new 3364 * device configuration is done in two 3365 * separate loops. 3366 */ 3367 dev->class = ehc->classes[dev->devno]; 3368 3369 if (dev->class == ATA_DEV_PMP) 3370 rc = sata_pmp_attach(dev); 3371 else 3372 rc = ata_dev_read_id(dev, &dev->class, 3373 readid_flags, dev->id); 3374 3375 /* read_id might have changed class, store and reset */ 3376 ehc->classes[dev->devno] = dev->class; 3377 dev->class = ATA_DEV_UNKNOWN; 3378 3379 switch (rc) { 3380 case 0: 3381 /* clear error info accumulated during probe */ 3382 ata_ering_clear(&dev->ering); 3383 new_mask |= 1 << dev->devno; 3384 break; 3385 case -ENOENT: 3386 /* IDENTIFY was issued to non-existent 3387 * device. No need to reset. Just 3388 * thaw and ignore the device. 3389 */ 3390 ata_eh_thaw_port(ap); 3391 break; 3392 default: 3393 goto err; 3394 } 3395 } 3396 } 3397 3398 /* PDIAG- should have been released, ask cable type if post-reset */ 3399 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3400 if (ap->ops->cable_detect) 3401 ap->cbl = ap->ops->cable_detect(ap); 3402 ata_force_cbl(ap); 3403 } 3404 3405 /* Configure new devices forward such that user doesn't see 3406 * device detection messages backwards. 3407 */ 3408 ata_for_each_dev(dev, link, ALL) { 3409 if (!(new_mask & (1 << dev->devno))) 3410 continue; 3411 3412 dev->class = ehc->classes[dev->devno]; 3413 3414 if (dev->class == ATA_DEV_PMP) 3415 continue; 3416 3417 ehc->i.flags |= ATA_EHI_PRINTINFO; 3418 rc = ata_dev_configure(dev); 3419 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3420 if (rc) { 3421 dev->class = ATA_DEV_UNKNOWN; 3422 goto err; 3423 } 3424 3425 spin_lock_irqsave(ap->lock, flags); 3426 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3427 spin_unlock_irqrestore(ap->lock, flags); 3428 3429 /* new device discovered, configure xfermode */ 3430 ehc->i.flags |= ATA_EHI_SETMODE; 3431 } 3432 3433 return 0; 3434 3435 err: 3436 dev->flags &= ~ATA_DFLAG_RESUMING; 3437 *r_failed_dev = dev; 3438 return rc; 3439 } 3440 3441 /** 3442 * ata_eh_set_mode - Program timings and issue SET FEATURES - XFER 3443 * @link: link on which timings will be programmed 3444 * @r_failed_dev: out parameter for failed device 3445 * 3446 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3447 * ata_eh_set_mode() fails, pointer to the failing device is 3448 * returned in @r_failed_dev. 3449 * 3450 * LOCKING: 3451 * PCI/etc. bus probe sem. 3452 * 3453 * RETURNS: 3454 * 0 on success, negative errno otherwise 3455 */ 3456 static int ata_eh_set_mode(struct ata_link *link, 3457 struct ata_device **r_failed_dev) 3458 { 3459 struct ata_port *ap = link->ap; 3460 struct ata_device *dev; 3461 int rc; 3462 3463 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3464 ata_for_each_dev(dev, link, ENABLED) { 3465 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3466 struct ata_ering_entry *ent; 3467 3468 ent = ata_ering_top(&dev->ering); 3469 if (ent) 3470 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3471 } 3472 } 3473 3474 /* has private set_mode? */ 3475 if (ap->ops->set_mode) 3476 rc = ap->ops->set_mode(link, r_failed_dev); 3477 else 3478 rc = ata_set_mode(link, r_failed_dev); 3479 3480 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3481 ata_for_each_dev(dev, link, ENABLED) { 3482 struct ata_eh_context *ehc = &link->eh_context; 3483 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3484 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3485 3486 if (dev->xfer_mode != saved_xfer_mode || 3487 ata_ncq_enabled(dev) != saved_ncq) 3488 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3489 } 3490 3491 return rc; 3492 } 3493 3494 /** 3495 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3496 * @dev: ATAPI device to clear UA for 3497 * 3498 * Resets and other operations can make an ATAPI device raise 3499 * UNIT ATTENTION which causes the next operation to fail. This 3500 * function clears UA. 3501 * 3502 * LOCKING: 3503 * EH context (may sleep). 3504 * 3505 * RETURNS: 3506 * 0 on success, -errno on failure. 3507 */ 3508 static int atapi_eh_clear_ua(struct ata_device *dev) 3509 { 3510 int i; 3511 3512 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3513 u8 *sense_buffer = dev->sector_buf; 3514 u8 sense_key = 0; 3515 unsigned int err_mask; 3516 3517 err_mask = atapi_eh_tur(dev, &sense_key); 3518 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3519 ata_dev_warn(dev, 3520 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3521 err_mask); 3522 return -EIO; 3523 } 3524 3525 if (!err_mask || sense_key != UNIT_ATTENTION) 3526 return 0; 3527 3528 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3529 if (err_mask) { 3530 ata_dev_warn(dev, "failed to clear " 3531 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3532 return -EIO; 3533 } 3534 } 3535 3536 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3537 ATA_EH_UA_TRIES); 3538 3539 return 0; 3540 } 3541 3542 /** 3543 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3544 * @dev: ATA device which may need FLUSH retry 3545 * 3546 * If @dev failed FLUSH, it needs to be reported upper layer 3547 * immediately as it means that @dev failed to remap and already 3548 * lost at least a sector and further FLUSH retrials won't make 3549 * any difference to the lost sector. However, if FLUSH failed 3550 * for other reasons, for example transmission error, FLUSH needs 3551 * to be retried. 3552 * 3553 * This function determines whether FLUSH failure retry is 3554 * necessary and performs it if so. 3555 * 3556 * RETURNS: 3557 * 0 if EH can continue, -errno if EH needs to be repeated. 3558 */ 3559 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3560 { 3561 struct ata_link *link = dev->link; 3562 struct ata_port *ap = link->ap; 3563 struct ata_queued_cmd *qc; 3564 struct ata_taskfile tf; 3565 unsigned int err_mask; 3566 int rc = 0; 3567 3568 /* did flush fail for this device? */ 3569 if (!ata_tag_valid(link->active_tag)) 3570 return 0; 3571 3572 qc = __ata_qc_from_tag(ap, link->active_tag); 3573 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3574 qc->tf.command != ATA_CMD_FLUSH)) 3575 return 0; 3576 3577 /* if the device failed it, it should be reported to upper layers */ 3578 if (qc->err_mask & AC_ERR_DEV) 3579 return 0; 3580 3581 /* flush failed for some other reason, give it another shot */ 3582 ata_tf_init(dev, &tf); 3583 3584 tf.command = qc->tf.command; 3585 tf.flags |= ATA_TFLAG_DEVICE; 3586 tf.protocol = ATA_PROT_NODATA; 3587 3588 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3589 tf.command, qc->err_mask); 3590 3591 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3592 if (!err_mask) { 3593 /* 3594 * FLUSH is complete but there's no way to 3595 * successfully complete a failed command from EH. 3596 * Making sure retry is allowed at least once and 3597 * retrying it should do the trick - whatever was in 3598 * the cache is already on the platter and this won't 3599 * cause infinite loop. 3600 */ 3601 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3602 } else { 3603 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3604 err_mask); 3605 rc = -EIO; 3606 3607 /* if device failed it, report it to upper layers */ 3608 if (err_mask & AC_ERR_DEV) { 3609 qc->err_mask |= AC_ERR_DEV; 3610 qc->result_tf = tf; 3611 if (!ata_port_is_frozen(ap)) 3612 rc = 0; 3613 } 3614 } 3615 return rc; 3616 } 3617 3618 int ata_link_nr_enabled(struct ata_link *link) 3619 { 3620 struct ata_device *dev; 3621 int cnt = 0; 3622 3623 ata_for_each_dev(dev, link, ENABLED) 3624 cnt++; 3625 return cnt; 3626 } 3627 3628 static int ata_link_nr_vacant(struct ata_link *link) 3629 { 3630 struct ata_device *dev; 3631 int cnt = 0; 3632 3633 ata_for_each_dev(dev, link, ALL) 3634 if (dev->class == ATA_DEV_UNKNOWN) 3635 cnt++; 3636 return cnt; 3637 } 3638 3639 static int ata_eh_skip_recovery(struct ata_link *link) 3640 { 3641 struct ata_port *ap = link->ap; 3642 struct ata_eh_context *ehc = &link->eh_context; 3643 struct ata_device *dev; 3644 3645 /* skip disabled links */ 3646 if (link->flags & ATA_LFLAG_DISABLED) 3647 return 1; 3648 3649 /* skip if explicitly requested */ 3650 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3651 return 1; 3652 3653 /* thaw frozen port and recover failed devices */ 3654 if (ata_port_is_frozen(ap) || ata_link_nr_enabled(link)) 3655 return 0; 3656 3657 /* reset at least once if reset is requested */ 3658 if ((ehc->i.action & ATA_EH_RESET) && 3659 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3660 return 0; 3661 3662 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3663 ata_for_each_dev(dev, link, ALL) { 3664 if (dev->class == ATA_DEV_UNKNOWN && 3665 ehc->classes[dev->devno] != ATA_DEV_NONE) 3666 return 0; 3667 } 3668 3669 return 1; 3670 } 3671 3672 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3673 { 3674 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3675 u64 now = get_jiffies_64(); 3676 int *trials = void_arg; 3677 3678 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3679 (ent->timestamp < now - min(now, interval))) 3680 return -1; 3681 3682 (*trials)++; 3683 return 0; 3684 } 3685 3686 static int ata_eh_schedule_probe(struct ata_device *dev) 3687 { 3688 struct ata_eh_context *ehc = &dev->link->eh_context; 3689 struct ata_link *link = ata_dev_phys_link(dev); 3690 int trials = 0; 3691 3692 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3693 (ehc->did_probe_mask & (1 << dev->devno))) 3694 return 0; 3695 3696 ata_eh_detach_dev(dev); 3697 ata_dev_init(dev); 3698 ehc->did_probe_mask |= (1 << dev->devno); 3699 ehc->i.action |= ATA_EH_RESET; 3700 ehc->saved_xfer_mode[dev->devno] = 0; 3701 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3702 3703 /* the link maybe in a deep sleep, wake it up */ 3704 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3705 if (ata_is_host_link(link)) 3706 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3707 ATA_LPM_EMPTY); 3708 else 3709 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3710 ATA_LPM_EMPTY); 3711 } 3712 3713 /* Record and count probe trials on the ering. The specific 3714 * error mask used is irrelevant. Because a successful device 3715 * detection clears the ering, this count accumulates only if 3716 * there are consecutive failed probes. 3717 * 3718 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3719 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3720 * forced to 1.5Gbps. 3721 * 3722 * This is to work around cases where failed link speed 3723 * negotiation results in device misdetection leading to 3724 * infinite DEVXCHG or PHRDY CHG events. 3725 */ 3726 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3727 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3728 3729 if (trials > ATA_EH_PROBE_TRIALS) 3730 sata_down_spd_limit(link, 1); 3731 3732 return 1; 3733 } 3734 3735 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3736 { 3737 struct ata_eh_context *ehc = &dev->link->eh_context; 3738 3739 /* -EAGAIN from EH routine indicates retry without prejudice. 3740 * The requester is responsible for ensuring forward progress. 3741 */ 3742 if (err != -EAGAIN) 3743 ehc->tries[dev->devno]--; 3744 3745 switch (err) { 3746 case -ENODEV: 3747 /* device missing or wrong IDENTIFY data, schedule probing */ 3748 ehc->i.probe_mask |= (1 << dev->devno); 3749 fallthrough; 3750 case -EINVAL: 3751 /* give it just one more chance */ 3752 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3753 fallthrough; 3754 case -EIO: 3755 if (ehc->tries[dev->devno] == 1) { 3756 /* This is the last chance, better to slow 3757 * down than lose it. 3758 */ 3759 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3760 if (dev->pio_mode > XFER_PIO_0) 3761 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3762 } 3763 } 3764 3765 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3766 /* disable device if it has used up all its chances */ 3767 ata_dev_disable(dev); 3768 3769 /* detach if offline */ 3770 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3771 ata_eh_detach_dev(dev); 3772 3773 /* schedule probe if necessary */ 3774 if (ata_eh_schedule_probe(dev)) { 3775 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3776 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3777 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3778 } 3779 3780 return 1; 3781 } else { 3782 ehc->i.action |= ATA_EH_RESET; 3783 return 0; 3784 } 3785 } 3786 3787 /** 3788 * ata_eh_recover - recover host port after error 3789 * @ap: host port to recover 3790 * @reset_ops: The set of reset operations to use 3791 * @r_failed_link: out parameter for failed link 3792 * 3793 * This is the alpha and omega, eum and yang, heart and soul of 3794 * libata exception handling. On entry, actions required to 3795 * recover each link and hotplug requests are recorded in the 3796 * link's eh_context. This function executes all the operations 3797 * with appropriate retrials and fallbacks to resurrect failed 3798 * devices, detach goners and greet newcomers. 3799 * 3800 * LOCKING: 3801 * Kernel thread context (may sleep). 3802 * 3803 * RETURNS: 3804 * 0 on success, -errno on failure. 3805 */ 3806 int ata_eh_recover(struct ata_port *ap, struct ata_reset_operations *reset_ops, 3807 struct ata_link **r_failed_link) 3808 { 3809 struct ata_link *link; 3810 struct ata_device *dev; 3811 int rc, nr_fails; 3812 unsigned long flags, deadline; 3813 3814 /* prep for recovery */ 3815 ata_for_each_link(link, ap, EDGE) { 3816 struct ata_eh_context *ehc = &link->eh_context; 3817 3818 /* re-enable link? */ 3819 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3820 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3821 spin_lock_irqsave(ap->lock, flags); 3822 link->flags &= ~ATA_LFLAG_DISABLED; 3823 spin_unlock_irqrestore(ap->lock, flags); 3824 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3825 } 3826 3827 ata_for_each_dev(dev, link, ALL) { 3828 if (link->flags & ATA_LFLAG_NO_RETRY) 3829 ehc->tries[dev->devno] = 1; 3830 else 3831 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3832 3833 /* collect port action mask recorded in dev actions */ 3834 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3835 ~ATA_EH_PERDEV_MASK; 3836 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3837 3838 /* process hotplug request */ 3839 if (dev->flags & ATA_DFLAG_DETACH) 3840 ata_eh_detach_dev(dev); 3841 3842 /* schedule probe if necessary */ 3843 if (!ata_dev_enabled(dev)) 3844 ata_eh_schedule_probe(dev); 3845 } 3846 } 3847 3848 retry: 3849 rc = 0; 3850 3851 /* if UNLOADING, finish immediately */ 3852 if (ap->pflags & ATA_PFLAG_UNLOADING) 3853 goto out; 3854 3855 /* prep for EH */ 3856 ata_for_each_link(link, ap, EDGE) { 3857 struct ata_eh_context *ehc = &link->eh_context; 3858 3859 /* skip EH if possible. */ 3860 if (ata_eh_skip_recovery(link)) 3861 ehc->i.action = 0; 3862 3863 ata_for_each_dev(dev, link, ALL) 3864 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3865 } 3866 3867 /* reset */ 3868 ata_for_each_link(link, ap, EDGE) { 3869 struct ata_eh_context *ehc = &link->eh_context; 3870 3871 if (!(ehc->i.action & ATA_EH_RESET)) 3872 continue; 3873 3874 rc = ata_eh_reset(link, ata_link_nr_vacant(link), reset_ops); 3875 if (rc) { 3876 ata_link_err(link, "reset failed, giving up\n"); 3877 goto out; 3878 } 3879 } 3880 3881 do { 3882 unsigned long now; 3883 3884 /* 3885 * clears ATA_EH_PARK in eh_info and resets 3886 * ap->park_req_pending 3887 */ 3888 ata_eh_pull_park_action(ap); 3889 3890 deadline = jiffies; 3891 ata_for_each_link(link, ap, EDGE) { 3892 ata_for_each_dev(dev, link, ALL) { 3893 struct ata_eh_context *ehc = &link->eh_context; 3894 unsigned long tmp; 3895 3896 if (dev->class != ATA_DEV_ATA && 3897 dev->class != ATA_DEV_ZAC) 3898 continue; 3899 if (!(ehc->i.dev_action[dev->devno] & 3900 ATA_EH_PARK)) 3901 continue; 3902 tmp = dev->unpark_deadline; 3903 if (time_before(deadline, tmp)) 3904 deadline = tmp; 3905 else if (time_before_eq(tmp, jiffies)) 3906 continue; 3907 if (ehc->unloaded_mask & (1 << dev->devno)) 3908 continue; 3909 3910 ata_eh_park_issue_cmd(dev, 1); 3911 } 3912 } 3913 3914 now = jiffies; 3915 if (time_before_eq(deadline, now)) 3916 break; 3917 3918 ata_eh_release(ap); 3919 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3920 deadline - now); 3921 ata_eh_acquire(ap); 3922 } while (deadline); 3923 ata_for_each_link(link, ap, EDGE) { 3924 ata_for_each_dev(dev, link, ALL) { 3925 if (!(link->eh_context.unloaded_mask & 3926 (1 << dev->devno))) 3927 continue; 3928 3929 ata_eh_park_issue_cmd(dev, 0); 3930 ata_eh_done(link, dev, ATA_EH_PARK); 3931 } 3932 } 3933 3934 /* the rest */ 3935 nr_fails = 0; 3936 ata_for_each_link(link, ap, PMP_FIRST) { 3937 struct ata_eh_context *ehc = &link->eh_context; 3938 3939 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3940 goto config_lpm; 3941 3942 /* revalidate existing devices and attach new ones */ 3943 rc = ata_eh_revalidate_and_attach(link, &dev); 3944 if (rc) 3945 goto rest_fail; 3946 3947 /* if PMP got attached, return, pmp EH will take care of it */ 3948 if (link->device->class == ATA_DEV_PMP) { 3949 ehc->i.action = 0; 3950 return 0; 3951 } 3952 3953 /* configure transfer mode if necessary */ 3954 if (ehc->i.flags & ATA_EHI_SETMODE) { 3955 rc = ata_eh_set_mode(link, &dev); 3956 if (rc) 3957 goto rest_fail; 3958 ehc->i.flags &= ~ATA_EHI_SETMODE; 3959 } 3960 3961 /* If reset has been issued, clear UA to avoid 3962 * disrupting the current users of the device. 3963 */ 3964 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3965 ata_for_each_dev(dev, link, ALL) { 3966 if (dev->class != ATA_DEV_ATAPI) 3967 continue; 3968 rc = atapi_eh_clear_ua(dev); 3969 if (rc) 3970 goto rest_fail; 3971 if (zpodd_dev_enabled(dev)) 3972 zpodd_post_poweron(dev); 3973 } 3974 } 3975 3976 /* 3977 * Make sure to transition devices to the active power mode 3978 * if needed (e.g. if we were scheduled on system resume). 3979 */ 3980 ata_for_each_dev(dev, link, ENABLED) { 3981 if (ehc->i.dev_action[dev->devno] & ATA_EH_SET_ACTIVE) { 3982 ata_dev_power_set_active(dev); 3983 ata_eh_done(link, dev, ATA_EH_SET_ACTIVE); 3984 } 3985 } 3986 3987 /* retry flush if necessary */ 3988 ata_for_each_dev(dev, link, ALL) { 3989 if (dev->class != ATA_DEV_ATA && 3990 dev->class != ATA_DEV_ZAC) 3991 continue; 3992 rc = ata_eh_maybe_retry_flush(dev); 3993 if (rc) 3994 goto rest_fail; 3995 } 3996 3997 config_lpm: 3998 /* configure link power saving */ 3999 if (link->lpm_policy != ap->target_lpm_policy) { 4000 rc = ata_eh_link_set_lpm(link, ap->target_lpm_policy, 4001 &dev); 4002 if (rc) 4003 goto rest_fail; 4004 } 4005 4006 /* this link is okay now */ 4007 ehc->i.flags = 0; 4008 continue; 4009 4010 rest_fail: 4011 nr_fails++; 4012 if (dev) 4013 ata_eh_handle_dev_fail(dev, rc); 4014 4015 if (ata_port_is_frozen(ap)) { 4016 /* PMP reset requires working host port. 4017 * Can't retry if it's frozen. 4018 */ 4019 if (sata_pmp_attached(ap)) 4020 goto out; 4021 break; 4022 } 4023 } 4024 4025 if (nr_fails) 4026 goto retry; 4027 4028 out: 4029 if (rc && r_failed_link) 4030 *r_failed_link = link; 4031 4032 return rc; 4033 } 4034 4035 /** 4036 * ata_eh_finish - finish up EH 4037 * @ap: host port to finish EH for 4038 * 4039 * Recovery is complete. Clean up EH states and retry or finish 4040 * failed qcs. 4041 * 4042 * LOCKING: 4043 * None. 4044 */ 4045 void ata_eh_finish(struct ata_port *ap) 4046 { 4047 struct ata_queued_cmd *qc; 4048 int tag; 4049 4050 /* retry or finish qcs */ 4051 ata_qc_for_each_raw(ap, qc, tag) { 4052 if (!(qc->flags & ATA_QCFLAG_EH)) 4053 continue; 4054 4055 if (qc->err_mask) { 4056 /* FIXME: Once EH migration is complete, 4057 * generate sense data in this function, 4058 * considering both err_mask and tf. 4059 */ 4060 if (qc->flags & ATA_QCFLAG_RETRY) { 4061 /* 4062 * Since qc->err_mask is set, ata_eh_qc_retry() 4063 * will not increment scmd->allowed, so upper 4064 * layer will only retry the command if it has 4065 * not already been retried too many times. 4066 */ 4067 ata_eh_qc_retry(qc); 4068 } else { 4069 ata_eh_qc_complete(qc); 4070 } 4071 } else { 4072 if (qc->flags & ATA_QCFLAG_SENSE_VALID || 4073 qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) { 4074 ata_eh_qc_complete(qc); 4075 } else { 4076 /* feed zero TF to sense generation */ 4077 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 4078 /* 4079 * Since qc->err_mask is not set, 4080 * ata_eh_qc_retry() will increment 4081 * scmd->allowed, so upper layer is guaranteed 4082 * to retry the command. 4083 */ 4084 ata_eh_qc_retry(qc); 4085 } 4086 } 4087 } 4088 4089 /* make sure nr_active_links is zero after EH */ 4090 WARN_ON(ap->nr_active_links); 4091 ap->nr_active_links = 0; 4092 } 4093 4094 /** 4095 * ata_std_error_handler - standard error handler 4096 * @ap: host port to handle error for 4097 * 4098 * Perform standard error handling sequence. 4099 * 4100 * LOCKING: 4101 * Kernel thread context (may sleep). 4102 */ 4103 void ata_std_error_handler(struct ata_port *ap) 4104 { 4105 struct ata_reset_operations *reset_ops = &ap->ops->reset; 4106 struct ata_link *link = &ap->link; 4107 int rc; 4108 4109 /* Ignore built-in hardresets if SCR access is not available */ 4110 if ((reset_ops->hardreset == sata_std_hardreset || 4111 reset_ops->hardreset == sata_sff_hardreset) && 4112 !sata_scr_valid(link)) 4113 link->flags |= ATA_LFLAG_NO_HRST; 4114 4115 ata_eh_autopsy(ap); 4116 ata_eh_report(ap); 4117 4118 rc = ata_eh_recover(ap, reset_ops, NULL); 4119 if (rc) { 4120 struct ata_device *dev; 4121 4122 ata_for_each_dev(dev, link, ALL) 4123 ata_dev_disable(dev); 4124 } 4125 4126 ata_eh_finish(ap); 4127 } 4128 EXPORT_SYMBOL_GPL(ata_std_error_handler); 4129 4130 #ifdef CONFIG_PM 4131 /** 4132 * ata_eh_handle_port_suspend - perform port suspend operation 4133 * @ap: port to suspend 4134 * 4135 * Suspend @ap. 4136 * 4137 * LOCKING: 4138 * Kernel thread context (may sleep). 4139 */ 4140 static void ata_eh_handle_port_suspend(struct ata_port *ap) 4141 { 4142 unsigned long flags; 4143 int rc = 0; 4144 struct ata_device *dev; 4145 struct ata_link *link; 4146 4147 /* are we suspending? */ 4148 spin_lock_irqsave(ap->lock, flags); 4149 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4150 ap->pm_mesg.event & PM_EVENT_RESUME) { 4151 spin_unlock_irqrestore(ap->lock, flags); 4152 return; 4153 } 4154 spin_unlock_irqrestore(ap->lock, flags); 4155 4156 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 4157 4158 /* 4159 * We will reach this point for all of the PM events: 4160 * PM_EVENT_SUSPEND (if runtime pm, PM_EVENT_AUTO will also be set) 4161 * PM_EVENT_FREEZE, and PM_EVENT_HIBERNATE. 4162 * 4163 * We do not want to perform disk spin down for PM_EVENT_FREEZE. 4164 * (Spin down will be performed by the subsequent PM_EVENT_HIBERNATE.) 4165 */ 4166 if (!(ap->pm_mesg.event & PM_EVENT_FREEZE)) { 4167 /* Set all devices attached to the port in standby mode */ 4168 ata_for_each_link(link, ap, HOST_FIRST) { 4169 ata_for_each_dev(dev, link, ENABLED) 4170 ata_dev_power_set_standby(dev); 4171 } 4172 } 4173 4174 /* 4175 * If we have a ZPODD attached, check its zero 4176 * power ready status before the port is frozen. 4177 * Only needed for runtime suspend. 4178 */ 4179 if (PMSG_IS_AUTO(ap->pm_mesg)) { 4180 ata_for_each_dev(dev, &ap->link, ENABLED) { 4181 if (zpodd_dev_enabled(dev)) 4182 zpodd_on_suspend(dev); 4183 } 4184 } 4185 4186 /* suspend */ 4187 ata_eh_freeze_port(ap); 4188 4189 if (ap->ops->port_suspend) 4190 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4191 4192 ata_acpi_set_state(ap, ap->pm_mesg); 4193 4194 /* update the flags */ 4195 spin_lock_irqsave(ap->lock, flags); 4196 4197 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4198 if (rc == 0) 4199 ap->pflags |= ATA_PFLAG_SUSPENDED; 4200 else if (ata_port_is_frozen(ap)) 4201 ata_port_schedule_eh(ap); 4202 4203 spin_unlock_irqrestore(ap->lock, flags); 4204 4205 return; 4206 } 4207 4208 /** 4209 * ata_eh_handle_port_resume - perform port resume operation 4210 * @ap: port to resume 4211 * 4212 * Resume @ap. 4213 * 4214 * LOCKING: 4215 * Kernel thread context (may sleep). 4216 */ 4217 static void ata_eh_handle_port_resume(struct ata_port *ap) 4218 { 4219 struct ata_link *link; 4220 struct ata_device *dev; 4221 unsigned long flags; 4222 4223 /* are we resuming? */ 4224 spin_lock_irqsave(ap->lock, flags); 4225 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4226 !(ap->pm_mesg.event & PM_EVENT_RESUME)) { 4227 spin_unlock_irqrestore(ap->lock, flags); 4228 return; 4229 } 4230 spin_unlock_irqrestore(ap->lock, flags); 4231 4232 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4233 4234 /* 4235 * Error timestamps are in jiffies which doesn't run while 4236 * suspended and PHY events during resume isn't too uncommon. 4237 * When the two are combined, it can lead to unnecessary speed 4238 * downs if the machine is suspended and resumed repeatedly. 4239 * Clear error history. 4240 */ 4241 ata_for_each_link(link, ap, HOST_FIRST) 4242 ata_for_each_dev(dev, link, ALL) 4243 ata_ering_clear(&dev->ering); 4244 4245 ata_acpi_set_state(ap, ap->pm_mesg); 4246 4247 if (ap->ops->port_resume) 4248 ap->ops->port_resume(ap); 4249 4250 /* tell ACPI that we're resuming */ 4251 ata_acpi_on_resume(ap); 4252 4253 /* update the flags */ 4254 spin_lock_irqsave(ap->lock, flags); 4255 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4256 ap->pflags |= ATA_PFLAG_RESUMING; 4257 spin_unlock_irqrestore(ap->lock, flags); 4258 } 4259 #endif /* CONFIG_PM */ 4260