1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/pci.h> 38 #include <scsi/scsi.h> 39 #include <scsi/scsi_host.h> 40 #include <scsi/scsi_eh.h> 41 #include <scsi/scsi_device.h> 42 #include <scsi/scsi_cmnd.h> 43 #include <scsi/scsi_dbg.h> 44 #include "../scsi/scsi_transport_api.h" 45 46 #include <linux/libata.h> 47 48 #include "libata.h" 49 50 enum { 51 /* speed down verdicts */ 52 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 53 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 54 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 55 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 56 57 /* error flags */ 58 ATA_EFLAG_IS_IO = (1 << 0), 59 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 60 61 /* error categories */ 62 ATA_ECAT_NONE = 0, 63 ATA_ECAT_ATA_BUS = 1, 64 ATA_ECAT_TOUT_HSM = 2, 65 ATA_ECAT_UNK_DEV = 3, 66 ATA_ECAT_DUBIOUS_NONE = 4, 67 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 68 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 69 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 70 ATA_ECAT_NR = 8, 71 72 ATA_EH_CMD_DFL_TIMEOUT = 5000, 73 74 /* always put at least this amount of time between resets */ 75 ATA_EH_RESET_COOL_DOWN = 5000, 76 77 /* Waiting in ->prereset can never be reliable. It's 78 * sometimes nice to wait there but it can't be depended upon; 79 * otherwise, we wouldn't be resetting. Just give it enough 80 * time for most drives to spin up. 81 */ 82 ATA_EH_PRERESET_TIMEOUT = 10000, 83 ATA_EH_FASTDRAIN_INTERVAL = 3000, 84 85 ATA_EH_UA_TRIES = 5, 86 87 /* probe speed down parameters, see ata_eh_schedule_probe() */ 88 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 89 ATA_EH_PROBE_TRIALS = 2, 90 }; 91 92 /* The following table determines how we sequence resets. Each entry 93 * represents timeout for that try. The first try can be soft or 94 * hardreset. All others are hardreset if available. In most cases 95 * the first reset w/ 10sec timeout should succeed. Following entries 96 * are mostly for error handling, hotplug and retarded devices. 97 */ 98 static const unsigned long ata_eh_reset_timeouts[] = { 99 10000, /* most drives spin up by 10sec */ 100 10000, /* > 99% working drives spin up before 20sec */ 101 35000, /* give > 30 secs of idleness for retarded devices */ 102 5000, /* and sweet one last chance */ 103 ULONG_MAX, /* > 1 min has elapsed, give up */ 104 }; 105 106 static const unsigned long ata_eh_identify_timeouts[] = { 107 5000, /* covers > 99% of successes and not too boring on failures */ 108 10000, /* combined time till here is enough even for media access */ 109 30000, /* for true idiots */ 110 ULONG_MAX, 111 }; 112 113 static const unsigned long ata_eh_other_timeouts[] = { 114 5000, /* same rationale as identify timeout */ 115 10000, /* ditto */ 116 /* but no merciful 30sec for other commands, it just isn't worth it */ 117 ULONG_MAX, 118 }; 119 120 struct ata_eh_cmd_timeout_ent { 121 const u8 *commands; 122 const unsigned long *timeouts; 123 }; 124 125 /* The following table determines timeouts to use for EH internal 126 * commands. Each table entry is a command class and matches the 127 * commands the entry applies to and the timeout table to use. 128 * 129 * On the retry after a command timed out, the next timeout value from 130 * the table is used. If the table doesn't contain further entries, 131 * the last value is used. 132 * 133 * ehc->cmd_timeout_idx keeps track of which timeout to use per 134 * command class, so if SET_FEATURES times out on the first try, the 135 * next try will use the second timeout value only for that class. 136 */ 137 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 138 static const struct ata_eh_cmd_timeout_ent 139 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 140 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 141 .timeouts = ata_eh_identify_timeouts, }, 142 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 143 .timeouts = ata_eh_other_timeouts, }, 144 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 145 .timeouts = ata_eh_other_timeouts, }, 146 { .commands = CMDS(ATA_CMD_SET_FEATURES), 147 .timeouts = ata_eh_other_timeouts, }, 148 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 149 .timeouts = ata_eh_other_timeouts, }, 150 }; 151 #undef CMDS 152 153 static void __ata_port_freeze(struct ata_port *ap); 154 #ifdef CONFIG_PM 155 static void ata_eh_handle_port_suspend(struct ata_port *ap); 156 static void ata_eh_handle_port_resume(struct ata_port *ap); 157 #else /* CONFIG_PM */ 158 static void ata_eh_handle_port_suspend(struct ata_port *ap) 159 { } 160 161 static void ata_eh_handle_port_resume(struct ata_port *ap) 162 { } 163 #endif /* CONFIG_PM */ 164 165 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 166 va_list args) 167 { 168 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 169 ATA_EH_DESC_LEN - ehi->desc_len, 170 fmt, args); 171 } 172 173 /** 174 * __ata_ehi_push_desc - push error description without adding separator 175 * @ehi: target EHI 176 * @fmt: printf format string 177 * 178 * Format string according to @fmt and append it to @ehi->desc. 179 * 180 * LOCKING: 181 * spin_lock_irqsave(host lock) 182 */ 183 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 184 { 185 va_list args; 186 187 va_start(args, fmt); 188 __ata_ehi_pushv_desc(ehi, fmt, args); 189 va_end(args); 190 } 191 192 /** 193 * ata_ehi_push_desc - push error description with separator 194 * @ehi: target EHI 195 * @fmt: printf format string 196 * 197 * Format string according to @fmt and append it to @ehi->desc. 198 * If @ehi->desc is not empty, ", " is added in-between. 199 * 200 * LOCKING: 201 * spin_lock_irqsave(host lock) 202 */ 203 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 204 { 205 va_list args; 206 207 if (ehi->desc_len) 208 __ata_ehi_push_desc(ehi, ", "); 209 210 va_start(args, fmt); 211 __ata_ehi_pushv_desc(ehi, fmt, args); 212 va_end(args); 213 } 214 215 /** 216 * ata_ehi_clear_desc - clean error description 217 * @ehi: target EHI 218 * 219 * Clear @ehi->desc. 220 * 221 * LOCKING: 222 * spin_lock_irqsave(host lock) 223 */ 224 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 225 { 226 ehi->desc[0] = '\0'; 227 ehi->desc_len = 0; 228 } 229 230 /** 231 * ata_port_desc - append port description 232 * @ap: target ATA port 233 * @fmt: printf format string 234 * 235 * Format string according to @fmt and append it to port 236 * description. If port description is not empty, " " is added 237 * in-between. This function is to be used while initializing 238 * ata_host. The description is printed on host registration. 239 * 240 * LOCKING: 241 * None. 242 */ 243 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 244 { 245 va_list args; 246 247 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 248 249 if (ap->link.eh_info.desc_len) 250 __ata_ehi_push_desc(&ap->link.eh_info, " "); 251 252 va_start(args, fmt); 253 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 254 va_end(args); 255 } 256 257 #ifdef CONFIG_PCI 258 259 /** 260 * ata_port_pbar_desc - append PCI BAR description 261 * @ap: target ATA port 262 * @bar: target PCI BAR 263 * @offset: offset into PCI BAR 264 * @name: name of the area 265 * 266 * If @offset is negative, this function formats a string which 267 * contains the name, address, size and type of the BAR and 268 * appends it to the port description. If @offset is zero or 269 * positive, only name and offsetted address is appended. 270 * 271 * LOCKING: 272 * None. 273 */ 274 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 275 const char *name) 276 { 277 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 278 char *type = ""; 279 unsigned long long start, len; 280 281 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 282 type = "m"; 283 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 284 type = "i"; 285 286 start = (unsigned long long)pci_resource_start(pdev, bar); 287 len = (unsigned long long)pci_resource_len(pdev, bar); 288 289 if (offset < 0) 290 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 291 else 292 ata_port_desc(ap, "%s 0x%llx", name, 293 start + (unsigned long long)offset); 294 } 295 296 #endif /* CONFIG_PCI */ 297 298 static int ata_lookup_timeout_table(u8 cmd) 299 { 300 int i; 301 302 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 303 const u8 *cur; 304 305 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 306 if (*cur == cmd) 307 return i; 308 } 309 310 return -1; 311 } 312 313 /** 314 * ata_internal_cmd_timeout - determine timeout for an internal command 315 * @dev: target device 316 * @cmd: internal command to be issued 317 * 318 * Determine timeout for internal command @cmd for @dev. 319 * 320 * LOCKING: 321 * EH context. 322 * 323 * RETURNS: 324 * Determined timeout. 325 */ 326 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 327 { 328 struct ata_eh_context *ehc = &dev->link->eh_context; 329 int ent = ata_lookup_timeout_table(cmd); 330 int idx; 331 332 if (ent < 0) 333 return ATA_EH_CMD_DFL_TIMEOUT; 334 335 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 336 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 337 } 338 339 /** 340 * ata_internal_cmd_timed_out - notification for internal command timeout 341 * @dev: target device 342 * @cmd: internal command which timed out 343 * 344 * Notify EH that internal command @cmd for @dev timed out. This 345 * function should be called only for commands whose timeouts are 346 * determined using ata_internal_cmd_timeout(). 347 * 348 * LOCKING: 349 * EH context. 350 */ 351 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 352 { 353 struct ata_eh_context *ehc = &dev->link->eh_context; 354 int ent = ata_lookup_timeout_table(cmd); 355 int idx; 356 357 if (ent < 0) 358 return; 359 360 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 361 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 362 ehc->cmd_timeout_idx[dev->devno][ent]++; 363 } 364 365 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 366 unsigned int err_mask) 367 { 368 struct ata_ering_entry *ent; 369 370 WARN_ON(!err_mask); 371 372 ering->cursor++; 373 ering->cursor %= ATA_ERING_SIZE; 374 375 ent = &ering->ring[ering->cursor]; 376 ent->eflags = eflags; 377 ent->err_mask = err_mask; 378 ent->timestamp = get_jiffies_64(); 379 } 380 381 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 382 { 383 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 384 385 if (ent->err_mask) 386 return ent; 387 return NULL; 388 } 389 390 static void ata_ering_clear(struct ata_ering *ering) 391 { 392 memset(ering, 0, sizeof(*ering)); 393 } 394 395 static int ata_ering_map(struct ata_ering *ering, 396 int (*map_fn)(struct ata_ering_entry *, void *), 397 void *arg) 398 { 399 int idx, rc = 0; 400 struct ata_ering_entry *ent; 401 402 idx = ering->cursor; 403 do { 404 ent = &ering->ring[idx]; 405 if (!ent->err_mask) 406 break; 407 rc = map_fn(ent, arg); 408 if (rc) 409 break; 410 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 411 } while (idx != ering->cursor); 412 413 return rc; 414 } 415 416 static unsigned int ata_eh_dev_action(struct ata_device *dev) 417 { 418 struct ata_eh_context *ehc = &dev->link->eh_context; 419 420 return ehc->i.action | ehc->i.dev_action[dev->devno]; 421 } 422 423 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 424 struct ata_eh_info *ehi, unsigned int action) 425 { 426 struct ata_device *tdev; 427 428 if (!dev) { 429 ehi->action &= ~action; 430 ata_for_each_dev(tdev, link, ALL) 431 ehi->dev_action[tdev->devno] &= ~action; 432 } else { 433 /* doesn't make sense for port-wide EH actions */ 434 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 435 436 /* break ehi->action into ehi->dev_action */ 437 if (ehi->action & action) { 438 ata_for_each_dev(tdev, link, ALL) 439 ehi->dev_action[tdev->devno] |= 440 ehi->action & action; 441 ehi->action &= ~action; 442 } 443 444 /* turn off the specified per-dev action */ 445 ehi->dev_action[dev->devno] &= ~action; 446 } 447 } 448 449 /** 450 * ata_scsi_timed_out - SCSI layer time out callback 451 * @cmd: timed out SCSI command 452 * 453 * Handles SCSI layer timeout. We race with normal completion of 454 * the qc for @cmd. If the qc is already gone, we lose and let 455 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 456 * timed out and EH should be invoked. Prevent ata_qc_complete() 457 * from finishing it by setting EH_SCHEDULED and return 458 * EH_NOT_HANDLED. 459 * 460 * TODO: kill this function once old EH is gone. 461 * 462 * LOCKING: 463 * Called from timer context 464 * 465 * RETURNS: 466 * EH_HANDLED or EH_NOT_HANDLED 467 */ 468 enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 469 { 470 struct Scsi_Host *host = cmd->device->host; 471 struct ata_port *ap = ata_shost_to_port(host); 472 unsigned long flags; 473 struct ata_queued_cmd *qc; 474 enum blk_eh_timer_return ret; 475 476 DPRINTK("ENTER\n"); 477 478 if (ap->ops->error_handler) { 479 ret = BLK_EH_NOT_HANDLED; 480 goto out; 481 } 482 483 ret = BLK_EH_HANDLED; 484 spin_lock_irqsave(ap->lock, flags); 485 qc = ata_qc_from_tag(ap, ap->link.active_tag); 486 if (qc) { 487 WARN_ON(qc->scsicmd != cmd); 488 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 489 qc->err_mask |= AC_ERR_TIMEOUT; 490 ret = BLK_EH_NOT_HANDLED; 491 } 492 spin_unlock_irqrestore(ap->lock, flags); 493 494 out: 495 DPRINTK("EXIT, ret=%d\n", ret); 496 return ret; 497 } 498 499 static void ata_eh_unload(struct ata_port *ap) 500 { 501 struct ata_link *link; 502 struct ata_device *dev; 503 unsigned long flags; 504 505 /* Restore SControl IPM and SPD for the next driver and 506 * disable attached devices. 507 */ 508 ata_for_each_link(link, ap, PMP_FIRST) { 509 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 510 ata_for_each_dev(dev, link, ALL) 511 ata_dev_disable(dev); 512 } 513 514 /* freeze and set UNLOADED */ 515 spin_lock_irqsave(ap->lock, flags); 516 517 ata_port_freeze(ap); /* won't be thawed */ 518 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 519 ap->pflags |= ATA_PFLAG_UNLOADED; 520 521 spin_unlock_irqrestore(ap->lock, flags); 522 } 523 524 /** 525 * ata_scsi_error - SCSI layer error handler callback 526 * @host: SCSI host on which error occurred 527 * 528 * Handles SCSI-layer-thrown error events. 529 * 530 * LOCKING: 531 * Inherited from SCSI layer (none, can sleep) 532 * 533 * RETURNS: 534 * Zero. 535 */ 536 void ata_scsi_error(struct Scsi_Host *host) 537 { 538 struct ata_port *ap = ata_shost_to_port(host); 539 int i; 540 unsigned long flags; 541 542 DPRINTK("ENTER\n"); 543 544 /* synchronize with port task */ 545 ata_port_flush_task(ap); 546 547 /* synchronize with host lock and sort out timeouts */ 548 549 /* For new EH, all qcs are finished in one of three ways - 550 * normal completion, error completion, and SCSI timeout. 551 * Both completions can race against SCSI timeout. When normal 552 * completion wins, the qc never reaches EH. When error 553 * completion wins, the qc has ATA_QCFLAG_FAILED set. 554 * 555 * When SCSI timeout wins, things are a bit more complex. 556 * Normal or error completion can occur after the timeout but 557 * before this point. In such cases, both types of 558 * completions are honored. A scmd is determined to have 559 * timed out iff its associated qc is active and not failed. 560 */ 561 if (ap->ops->error_handler) { 562 struct scsi_cmnd *scmd, *tmp; 563 int nr_timedout = 0; 564 565 spin_lock_irqsave(ap->lock, flags); 566 567 /* This must occur under the ap->lock as we don't want 568 a polled recovery to race the real interrupt handler 569 570 The lost_interrupt handler checks for any completed but 571 non-notified command and completes much like an IRQ handler. 572 573 We then fall into the error recovery code which will treat 574 this as if normal completion won the race */ 575 576 if (ap->ops->lost_interrupt) 577 ap->ops->lost_interrupt(ap); 578 579 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 580 struct ata_queued_cmd *qc; 581 582 for (i = 0; i < ATA_MAX_QUEUE; i++) { 583 qc = __ata_qc_from_tag(ap, i); 584 if (qc->flags & ATA_QCFLAG_ACTIVE && 585 qc->scsicmd == scmd) 586 break; 587 } 588 589 if (i < ATA_MAX_QUEUE) { 590 /* the scmd has an associated qc */ 591 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 592 /* which hasn't failed yet, timeout */ 593 qc->err_mask |= AC_ERR_TIMEOUT; 594 qc->flags |= ATA_QCFLAG_FAILED; 595 nr_timedout++; 596 } 597 } else { 598 /* Normal completion occurred after 599 * SCSI timeout but before this point. 600 * Successfully complete it. 601 */ 602 scmd->retries = scmd->allowed; 603 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 604 } 605 } 606 607 /* If we have timed out qcs. They belong to EH from 608 * this point but the state of the controller is 609 * unknown. Freeze the port to make sure the IRQ 610 * handler doesn't diddle with those qcs. This must 611 * be done atomically w.r.t. setting QCFLAG_FAILED. 612 */ 613 if (nr_timedout) 614 __ata_port_freeze(ap); 615 616 spin_unlock_irqrestore(ap->lock, flags); 617 618 /* initialize eh_tries */ 619 ap->eh_tries = ATA_EH_MAX_TRIES; 620 } else 621 spin_unlock_wait(ap->lock); 622 623 /* If we timed raced normal completion and there is nothing to 624 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 625 626 repeat: 627 /* invoke error handler */ 628 if (ap->ops->error_handler) { 629 struct ata_link *link; 630 631 /* kill fast drain timer */ 632 del_timer_sync(&ap->fastdrain_timer); 633 634 /* process port resume request */ 635 ata_eh_handle_port_resume(ap); 636 637 /* fetch & clear EH info */ 638 spin_lock_irqsave(ap->lock, flags); 639 640 ata_for_each_link(link, ap, HOST_FIRST) { 641 struct ata_eh_context *ehc = &link->eh_context; 642 struct ata_device *dev; 643 644 memset(&link->eh_context, 0, sizeof(link->eh_context)); 645 link->eh_context.i = link->eh_info; 646 memset(&link->eh_info, 0, sizeof(link->eh_info)); 647 648 ata_for_each_dev(dev, link, ENABLED) { 649 int devno = dev->devno; 650 651 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 652 if (ata_ncq_enabled(dev)) 653 ehc->saved_ncq_enabled |= 1 << devno; 654 } 655 } 656 657 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 658 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 659 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 660 661 spin_unlock_irqrestore(ap->lock, flags); 662 663 /* invoke EH, skip if unloading or suspended */ 664 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 665 ap->ops->error_handler(ap); 666 else { 667 /* if unloading, commence suicide */ 668 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 669 !(ap->pflags & ATA_PFLAG_UNLOADED)) 670 ata_eh_unload(ap); 671 ata_eh_finish(ap); 672 } 673 674 /* process port suspend request */ 675 ata_eh_handle_port_suspend(ap); 676 677 /* Exception might have happend after ->error_handler 678 * recovered the port but before this point. Repeat 679 * EH in such case. 680 */ 681 spin_lock_irqsave(ap->lock, flags); 682 683 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 684 if (--ap->eh_tries) { 685 spin_unlock_irqrestore(ap->lock, flags); 686 goto repeat; 687 } 688 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 689 "tries, giving up\n", ATA_EH_MAX_TRIES); 690 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 691 } 692 693 /* this run is complete, make sure EH info is clear */ 694 ata_for_each_link(link, ap, HOST_FIRST) 695 memset(&link->eh_info, 0, sizeof(link->eh_info)); 696 697 /* Clear host_eh_scheduled while holding ap->lock such 698 * that if exception occurs after this point but 699 * before EH completion, SCSI midlayer will 700 * re-initiate EH. 701 */ 702 host->host_eh_scheduled = 0; 703 704 spin_unlock_irqrestore(ap->lock, flags); 705 } else { 706 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 707 ap->ops->eng_timeout(ap); 708 } 709 710 /* finish or retry handled scmd's and clean up */ 711 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 712 713 scsi_eh_flush_done_q(&ap->eh_done_q); 714 715 /* clean up */ 716 spin_lock_irqsave(ap->lock, flags); 717 718 if (ap->pflags & ATA_PFLAG_LOADING) 719 ap->pflags &= ~ATA_PFLAG_LOADING; 720 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 721 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 722 723 if (ap->pflags & ATA_PFLAG_RECOVERED) 724 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 725 726 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 727 728 /* tell wait_eh that we're done */ 729 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 730 wake_up_all(&ap->eh_wait_q); 731 732 spin_unlock_irqrestore(ap->lock, flags); 733 734 DPRINTK("EXIT\n"); 735 } 736 737 /** 738 * ata_port_wait_eh - Wait for the currently pending EH to complete 739 * @ap: Port to wait EH for 740 * 741 * Wait until the currently pending EH is complete. 742 * 743 * LOCKING: 744 * Kernel thread context (may sleep). 745 */ 746 void ata_port_wait_eh(struct ata_port *ap) 747 { 748 unsigned long flags; 749 DEFINE_WAIT(wait); 750 751 retry: 752 spin_lock_irqsave(ap->lock, flags); 753 754 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 755 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 756 spin_unlock_irqrestore(ap->lock, flags); 757 schedule(); 758 spin_lock_irqsave(ap->lock, flags); 759 } 760 finish_wait(&ap->eh_wait_q, &wait); 761 762 spin_unlock_irqrestore(ap->lock, flags); 763 764 /* make sure SCSI EH is complete */ 765 if (scsi_host_in_recovery(ap->scsi_host)) { 766 msleep(10); 767 goto retry; 768 } 769 } 770 771 static int ata_eh_nr_in_flight(struct ata_port *ap) 772 { 773 unsigned int tag; 774 int nr = 0; 775 776 /* count only non-internal commands */ 777 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 778 if (ata_qc_from_tag(ap, tag)) 779 nr++; 780 781 return nr; 782 } 783 784 void ata_eh_fastdrain_timerfn(unsigned long arg) 785 { 786 struct ata_port *ap = (void *)arg; 787 unsigned long flags; 788 int cnt; 789 790 spin_lock_irqsave(ap->lock, flags); 791 792 cnt = ata_eh_nr_in_flight(ap); 793 794 /* are we done? */ 795 if (!cnt) 796 goto out_unlock; 797 798 if (cnt == ap->fastdrain_cnt) { 799 unsigned int tag; 800 801 /* No progress during the last interval, tag all 802 * in-flight qcs as timed out and freeze the port. 803 */ 804 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 805 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 806 if (qc) 807 qc->err_mask |= AC_ERR_TIMEOUT; 808 } 809 810 ata_port_freeze(ap); 811 } else { 812 /* some qcs have finished, give it another chance */ 813 ap->fastdrain_cnt = cnt; 814 ap->fastdrain_timer.expires = 815 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 816 add_timer(&ap->fastdrain_timer); 817 } 818 819 out_unlock: 820 spin_unlock_irqrestore(ap->lock, flags); 821 } 822 823 /** 824 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 825 * @ap: target ATA port 826 * @fastdrain: activate fast drain 827 * 828 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 829 * is non-zero and EH wasn't pending before. Fast drain ensures 830 * that EH kicks in in timely manner. 831 * 832 * LOCKING: 833 * spin_lock_irqsave(host lock) 834 */ 835 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 836 { 837 int cnt; 838 839 /* already scheduled? */ 840 if (ap->pflags & ATA_PFLAG_EH_PENDING) 841 return; 842 843 ap->pflags |= ATA_PFLAG_EH_PENDING; 844 845 if (!fastdrain) 846 return; 847 848 /* do we have in-flight qcs? */ 849 cnt = ata_eh_nr_in_flight(ap); 850 if (!cnt) 851 return; 852 853 /* activate fast drain */ 854 ap->fastdrain_cnt = cnt; 855 ap->fastdrain_timer.expires = 856 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 857 add_timer(&ap->fastdrain_timer); 858 } 859 860 /** 861 * ata_qc_schedule_eh - schedule qc for error handling 862 * @qc: command to schedule error handling for 863 * 864 * Schedule error handling for @qc. EH will kick in as soon as 865 * other commands are drained. 866 * 867 * LOCKING: 868 * spin_lock_irqsave(host lock) 869 */ 870 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 871 { 872 struct ata_port *ap = qc->ap; 873 874 WARN_ON(!ap->ops->error_handler); 875 876 qc->flags |= ATA_QCFLAG_FAILED; 877 ata_eh_set_pending(ap, 1); 878 879 /* The following will fail if timeout has already expired. 880 * ata_scsi_error() takes care of such scmds on EH entry. 881 * Note that ATA_QCFLAG_FAILED is unconditionally set after 882 * this function completes. 883 */ 884 blk_abort_request(qc->scsicmd->request); 885 } 886 887 /** 888 * ata_port_schedule_eh - schedule error handling without a qc 889 * @ap: ATA port to schedule EH for 890 * 891 * Schedule error handling for @ap. EH will kick in as soon as 892 * all commands are drained. 893 * 894 * LOCKING: 895 * spin_lock_irqsave(host lock) 896 */ 897 void ata_port_schedule_eh(struct ata_port *ap) 898 { 899 WARN_ON(!ap->ops->error_handler); 900 901 if (ap->pflags & ATA_PFLAG_INITIALIZING) 902 return; 903 904 ata_eh_set_pending(ap, 1); 905 scsi_schedule_eh(ap->scsi_host); 906 907 DPRINTK("port EH scheduled\n"); 908 } 909 910 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 911 { 912 int tag, nr_aborted = 0; 913 914 WARN_ON(!ap->ops->error_handler); 915 916 /* we're gonna abort all commands, no need for fast drain */ 917 ata_eh_set_pending(ap, 0); 918 919 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 920 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 921 922 if (qc && (!link || qc->dev->link == link)) { 923 qc->flags |= ATA_QCFLAG_FAILED; 924 ata_qc_complete(qc); 925 nr_aborted++; 926 } 927 } 928 929 if (!nr_aborted) 930 ata_port_schedule_eh(ap); 931 932 return nr_aborted; 933 } 934 935 /** 936 * ata_link_abort - abort all qc's on the link 937 * @link: ATA link to abort qc's for 938 * 939 * Abort all active qc's active on @link and schedule EH. 940 * 941 * LOCKING: 942 * spin_lock_irqsave(host lock) 943 * 944 * RETURNS: 945 * Number of aborted qc's. 946 */ 947 int ata_link_abort(struct ata_link *link) 948 { 949 return ata_do_link_abort(link->ap, link); 950 } 951 952 /** 953 * ata_port_abort - abort all qc's on the port 954 * @ap: ATA port to abort qc's for 955 * 956 * Abort all active qc's of @ap and schedule EH. 957 * 958 * LOCKING: 959 * spin_lock_irqsave(host_set lock) 960 * 961 * RETURNS: 962 * Number of aborted qc's. 963 */ 964 int ata_port_abort(struct ata_port *ap) 965 { 966 return ata_do_link_abort(ap, NULL); 967 } 968 969 /** 970 * __ata_port_freeze - freeze port 971 * @ap: ATA port to freeze 972 * 973 * This function is called when HSM violation or some other 974 * condition disrupts normal operation of the port. Frozen port 975 * is not allowed to perform any operation until the port is 976 * thawed, which usually follows a successful reset. 977 * 978 * ap->ops->freeze() callback can be used for freezing the port 979 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 980 * port cannot be frozen hardware-wise, the interrupt handler 981 * must ack and clear interrupts unconditionally while the port 982 * is frozen. 983 * 984 * LOCKING: 985 * spin_lock_irqsave(host lock) 986 */ 987 static void __ata_port_freeze(struct ata_port *ap) 988 { 989 WARN_ON(!ap->ops->error_handler); 990 991 if (ap->ops->freeze) 992 ap->ops->freeze(ap); 993 994 ap->pflags |= ATA_PFLAG_FROZEN; 995 996 DPRINTK("ata%u port frozen\n", ap->print_id); 997 } 998 999 /** 1000 * ata_port_freeze - abort & freeze port 1001 * @ap: ATA port to freeze 1002 * 1003 * Abort and freeze @ap. The freeze operation must be called 1004 * first, because some hardware requires special operations 1005 * before the taskfile registers are accessible. 1006 * 1007 * LOCKING: 1008 * spin_lock_irqsave(host lock) 1009 * 1010 * RETURNS: 1011 * Number of aborted commands. 1012 */ 1013 int ata_port_freeze(struct ata_port *ap) 1014 { 1015 int nr_aborted; 1016 1017 WARN_ON(!ap->ops->error_handler); 1018 1019 __ata_port_freeze(ap); 1020 nr_aborted = ata_port_abort(ap); 1021 1022 return nr_aborted; 1023 } 1024 1025 /** 1026 * sata_async_notification - SATA async notification handler 1027 * @ap: ATA port where async notification is received 1028 * 1029 * Handler to be called when async notification via SDB FIS is 1030 * received. This function schedules EH if necessary. 1031 * 1032 * LOCKING: 1033 * spin_lock_irqsave(host lock) 1034 * 1035 * RETURNS: 1036 * 1 if EH is scheduled, 0 otherwise. 1037 */ 1038 int sata_async_notification(struct ata_port *ap) 1039 { 1040 u32 sntf; 1041 int rc; 1042 1043 if (!(ap->flags & ATA_FLAG_AN)) 1044 return 0; 1045 1046 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1047 if (rc == 0) 1048 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1049 1050 if (!sata_pmp_attached(ap) || rc) { 1051 /* PMP is not attached or SNTF is not available */ 1052 if (!sata_pmp_attached(ap)) { 1053 /* PMP is not attached. Check whether ATAPI 1054 * AN is configured. If so, notify media 1055 * change. 1056 */ 1057 struct ata_device *dev = ap->link.device; 1058 1059 if ((dev->class == ATA_DEV_ATAPI) && 1060 (dev->flags & ATA_DFLAG_AN)) 1061 ata_scsi_media_change_notify(dev); 1062 return 0; 1063 } else { 1064 /* PMP is attached but SNTF is not available. 1065 * ATAPI async media change notification is 1066 * not used. The PMP must be reporting PHY 1067 * status change, schedule EH. 1068 */ 1069 ata_port_schedule_eh(ap); 1070 return 1; 1071 } 1072 } else { 1073 /* PMP is attached and SNTF is available */ 1074 struct ata_link *link; 1075 1076 /* check and notify ATAPI AN */ 1077 ata_for_each_link(link, ap, EDGE) { 1078 if (!(sntf & (1 << link->pmp))) 1079 continue; 1080 1081 if ((link->device->class == ATA_DEV_ATAPI) && 1082 (link->device->flags & ATA_DFLAG_AN)) 1083 ata_scsi_media_change_notify(link->device); 1084 } 1085 1086 /* If PMP is reporting that PHY status of some 1087 * downstream ports has changed, schedule EH. 1088 */ 1089 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1090 ata_port_schedule_eh(ap); 1091 return 1; 1092 } 1093 1094 return 0; 1095 } 1096 } 1097 1098 /** 1099 * ata_eh_freeze_port - EH helper to freeze port 1100 * @ap: ATA port to freeze 1101 * 1102 * Freeze @ap. 1103 * 1104 * LOCKING: 1105 * None. 1106 */ 1107 void ata_eh_freeze_port(struct ata_port *ap) 1108 { 1109 unsigned long flags; 1110 1111 if (!ap->ops->error_handler) 1112 return; 1113 1114 spin_lock_irqsave(ap->lock, flags); 1115 __ata_port_freeze(ap); 1116 spin_unlock_irqrestore(ap->lock, flags); 1117 } 1118 1119 /** 1120 * ata_port_thaw_port - EH helper to thaw port 1121 * @ap: ATA port to thaw 1122 * 1123 * Thaw frozen port @ap. 1124 * 1125 * LOCKING: 1126 * None. 1127 */ 1128 void ata_eh_thaw_port(struct ata_port *ap) 1129 { 1130 unsigned long flags; 1131 1132 if (!ap->ops->error_handler) 1133 return; 1134 1135 spin_lock_irqsave(ap->lock, flags); 1136 1137 ap->pflags &= ~ATA_PFLAG_FROZEN; 1138 1139 if (ap->ops->thaw) 1140 ap->ops->thaw(ap); 1141 1142 spin_unlock_irqrestore(ap->lock, flags); 1143 1144 DPRINTK("ata%u port thawed\n", ap->print_id); 1145 } 1146 1147 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1148 { 1149 /* nada */ 1150 } 1151 1152 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1153 { 1154 struct ata_port *ap = qc->ap; 1155 struct scsi_cmnd *scmd = qc->scsicmd; 1156 unsigned long flags; 1157 1158 spin_lock_irqsave(ap->lock, flags); 1159 qc->scsidone = ata_eh_scsidone; 1160 __ata_qc_complete(qc); 1161 WARN_ON(ata_tag_valid(qc->tag)); 1162 spin_unlock_irqrestore(ap->lock, flags); 1163 1164 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1165 } 1166 1167 /** 1168 * ata_eh_qc_complete - Complete an active ATA command from EH 1169 * @qc: Command to complete 1170 * 1171 * Indicate to the mid and upper layers that an ATA command has 1172 * completed. To be used from EH. 1173 */ 1174 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1175 { 1176 struct scsi_cmnd *scmd = qc->scsicmd; 1177 scmd->retries = scmd->allowed; 1178 __ata_eh_qc_complete(qc); 1179 } 1180 1181 /** 1182 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1183 * @qc: Command to retry 1184 * 1185 * Indicate to the mid and upper layers that an ATA command 1186 * should be retried. To be used from EH. 1187 * 1188 * SCSI midlayer limits the number of retries to scmd->allowed. 1189 * scmd->retries is decremented for commands which get retried 1190 * due to unrelated failures (qc->err_mask is zero). 1191 */ 1192 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1193 { 1194 struct scsi_cmnd *scmd = qc->scsicmd; 1195 if (!qc->err_mask && scmd->retries) 1196 scmd->retries--; 1197 __ata_eh_qc_complete(qc); 1198 } 1199 1200 /** 1201 * ata_dev_disable - disable ATA device 1202 * @dev: ATA device to disable 1203 * 1204 * Disable @dev. 1205 * 1206 * Locking: 1207 * EH context. 1208 */ 1209 void ata_dev_disable(struct ata_device *dev) 1210 { 1211 if (!ata_dev_enabled(dev)) 1212 return; 1213 1214 if (ata_msg_drv(dev->link->ap)) 1215 ata_dev_printk(dev, KERN_WARNING, "disabled\n"); 1216 ata_acpi_on_disable(dev); 1217 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1218 dev->class++; 1219 1220 /* From now till the next successful probe, ering is used to 1221 * track probe failures. Clear accumulated device error info. 1222 */ 1223 ata_ering_clear(&dev->ering); 1224 } 1225 1226 /** 1227 * ata_eh_detach_dev - detach ATA device 1228 * @dev: ATA device to detach 1229 * 1230 * Detach @dev. 1231 * 1232 * LOCKING: 1233 * None. 1234 */ 1235 void ata_eh_detach_dev(struct ata_device *dev) 1236 { 1237 struct ata_link *link = dev->link; 1238 struct ata_port *ap = link->ap; 1239 struct ata_eh_context *ehc = &link->eh_context; 1240 unsigned long flags; 1241 1242 ata_dev_disable(dev); 1243 1244 spin_lock_irqsave(ap->lock, flags); 1245 1246 dev->flags &= ~ATA_DFLAG_DETACH; 1247 1248 if (ata_scsi_offline_dev(dev)) { 1249 dev->flags |= ATA_DFLAG_DETACHED; 1250 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1251 } 1252 1253 /* clear per-dev EH info */ 1254 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1255 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1256 ehc->saved_xfer_mode[dev->devno] = 0; 1257 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1258 1259 spin_unlock_irqrestore(ap->lock, flags); 1260 } 1261 1262 /** 1263 * ata_eh_about_to_do - about to perform eh_action 1264 * @link: target ATA link 1265 * @dev: target ATA dev for per-dev action (can be NULL) 1266 * @action: action about to be performed 1267 * 1268 * Called just before performing EH actions to clear related bits 1269 * in @link->eh_info such that eh actions are not unnecessarily 1270 * repeated. 1271 * 1272 * LOCKING: 1273 * None. 1274 */ 1275 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1276 unsigned int action) 1277 { 1278 struct ata_port *ap = link->ap; 1279 struct ata_eh_info *ehi = &link->eh_info; 1280 struct ata_eh_context *ehc = &link->eh_context; 1281 unsigned long flags; 1282 1283 spin_lock_irqsave(ap->lock, flags); 1284 1285 ata_eh_clear_action(link, dev, ehi, action); 1286 1287 /* About to take EH action, set RECOVERED. Ignore actions on 1288 * slave links as master will do them again. 1289 */ 1290 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1291 ap->pflags |= ATA_PFLAG_RECOVERED; 1292 1293 spin_unlock_irqrestore(ap->lock, flags); 1294 } 1295 1296 /** 1297 * ata_eh_done - EH action complete 1298 * @ap: target ATA port 1299 * @dev: target ATA dev for per-dev action (can be NULL) 1300 * @action: action just completed 1301 * 1302 * Called right after performing EH actions to clear related bits 1303 * in @link->eh_context. 1304 * 1305 * LOCKING: 1306 * None. 1307 */ 1308 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1309 unsigned int action) 1310 { 1311 struct ata_eh_context *ehc = &link->eh_context; 1312 1313 ata_eh_clear_action(link, dev, &ehc->i, action); 1314 } 1315 1316 /** 1317 * ata_err_string - convert err_mask to descriptive string 1318 * @err_mask: error mask to convert to string 1319 * 1320 * Convert @err_mask to descriptive string. Errors are 1321 * prioritized according to severity and only the most severe 1322 * error is reported. 1323 * 1324 * LOCKING: 1325 * None. 1326 * 1327 * RETURNS: 1328 * Descriptive string for @err_mask 1329 */ 1330 static const char *ata_err_string(unsigned int err_mask) 1331 { 1332 if (err_mask & AC_ERR_HOST_BUS) 1333 return "host bus error"; 1334 if (err_mask & AC_ERR_ATA_BUS) 1335 return "ATA bus error"; 1336 if (err_mask & AC_ERR_TIMEOUT) 1337 return "timeout"; 1338 if (err_mask & AC_ERR_HSM) 1339 return "HSM violation"; 1340 if (err_mask & AC_ERR_SYSTEM) 1341 return "internal error"; 1342 if (err_mask & AC_ERR_MEDIA) 1343 return "media error"; 1344 if (err_mask & AC_ERR_INVALID) 1345 return "invalid argument"; 1346 if (err_mask & AC_ERR_DEV) 1347 return "device error"; 1348 return "unknown error"; 1349 } 1350 1351 /** 1352 * ata_read_log_page - read a specific log page 1353 * @dev: target device 1354 * @page: page to read 1355 * @buf: buffer to store read page 1356 * @sectors: number of sectors to read 1357 * 1358 * Read log page using READ_LOG_EXT command. 1359 * 1360 * LOCKING: 1361 * Kernel thread context (may sleep). 1362 * 1363 * RETURNS: 1364 * 0 on success, AC_ERR_* mask otherwise. 1365 */ 1366 static unsigned int ata_read_log_page(struct ata_device *dev, 1367 u8 page, void *buf, unsigned int sectors) 1368 { 1369 struct ata_taskfile tf; 1370 unsigned int err_mask; 1371 1372 DPRINTK("read log page - page %d\n", page); 1373 1374 ata_tf_init(dev, &tf); 1375 tf.command = ATA_CMD_READ_LOG_EXT; 1376 tf.lbal = page; 1377 tf.nsect = sectors; 1378 tf.hob_nsect = sectors >> 8; 1379 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1380 tf.protocol = ATA_PROT_PIO; 1381 1382 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1383 buf, sectors * ATA_SECT_SIZE, 0); 1384 1385 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1386 return err_mask; 1387 } 1388 1389 /** 1390 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1391 * @dev: Device to read log page 10h from 1392 * @tag: Resulting tag of the failed command 1393 * @tf: Resulting taskfile registers of the failed command 1394 * 1395 * Read log page 10h to obtain NCQ error details and clear error 1396 * condition. 1397 * 1398 * LOCKING: 1399 * Kernel thread context (may sleep). 1400 * 1401 * RETURNS: 1402 * 0 on success, -errno otherwise. 1403 */ 1404 static int ata_eh_read_log_10h(struct ata_device *dev, 1405 int *tag, struct ata_taskfile *tf) 1406 { 1407 u8 *buf = dev->link->ap->sector_buf; 1408 unsigned int err_mask; 1409 u8 csum; 1410 int i; 1411 1412 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1413 if (err_mask) 1414 return -EIO; 1415 1416 csum = 0; 1417 for (i = 0; i < ATA_SECT_SIZE; i++) 1418 csum += buf[i]; 1419 if (csum) 1420 ata_dev_printk(dev, KERN_WARNING, 1421 "invalid checksum 0x%x on log page 10h\n", csum); 1422 1423 if (buf[0] & 0x80) 1424 return -ENOENT; 1425 1426 *tag = buf[0] & 0x1f; 1427 1428 tf->command = buf[2]; 1429 tf->feature = buf[3]; 1430 tf->lbal = buf[4]; 1431 tf->lbam = buf[5]; 1432 tf->lbah = buf[6]; 1433 tf->device = buf[7]; 1434 tf->hob_lbal = buf[8]; 1435 tf->hob_lbam = buf[9]; 1436 tf->hob_lbah = buf[10]; 1437 tf->nsect = buf[12]; 1438 tf->hob_nsect = buf[13]; 1439 1440 return 0; 1441 } 1442 1443 /** 1444 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1445 * @dev: target ATAPI device 1446 * @r_sense_key: out parameter for sense_key 1447 * 1448 * Perform ATAPI TEST_UNIT_READY. 1449 * 1450 * LOCKING: 1451 * EH context (may sleep). 1452 * 1453 * RETURNS: 1454 * 0 on success, AC_ERR_* mask on failure. 1455 */ 1456 static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1457 { 1458 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1459 struct ata_taskfile tf; 1460 unsigned int err_mask; 1461 1462 ata_tf_init(dev, &tf); 1463 1464 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1465 tf.command = ATA_CMD_PACKET; 1466 tf.protocol = ATAPI_PROT_NODATA; 1467 1468 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1469 if (err_mask == AC_ERR_DEV) 1470 *r_sense_key = tf.feature >> 4; 1471 return err_mask; 1472 } 1473 1474 /** 1475 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1476 * @dev: device to perform REQUEST_SENSE to 1477 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1478 * @dfl_sense_key: default sense key to use 1479 * 1480 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1481 * SENSE. This function is EH helper. 1482 * 1483 * LOCKING: 1484 * Kernel thread context (may sleep). 1485 * 1486 * RETURNS: 1487 * 0 on success, AC_ERR_* mask on failure 1488 */ 1489 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 1490 u8 *sense_buf, u8 dfl_sense_key) 1491 { 1492 u8 cdb[ATAPI_CDB_LEN] = 1493 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1494 struct ata_port *ap = dev->link->ap; 1495 struct ata_taskfile tf; 1496 1497 DPRINTK("ATAPI request sense\n"); 1498 1499 /* FIXME: is this needed? */ 1500 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1501 1502 /* initialize sense_buf with the error register, 1503 * for the case where they are -not- overwritten 1504 */ 1505 sense_buf[0] = 0x70; 1506 sense_buf[2] = dfl_sense_key; 1507 1508 /* some devices time out if garbage left in tf */ 1509 ata_tf_init(dev, &tf); 1510 1511 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1512 tf.command = ATA_CMD_PACKET; 1513 1514 /* is it pointless to prefer PIO for "safety reasons"? */ 1515 if (ap->flags & ATA_FLAG_PIO_DMA) { 1516 tf.protocol = ATAPI_PROT_DMA; 1517 tf.feature |= ATAPI_PKT_DMA; 1518 } else { 1519 tf.protocol = ATAPI_PROT_PIO; 1520 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1521 tf.lbah = 0; 1522 } 1523 1524 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1525 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1526 } 1527 1528 /** 1529 * ata_eh_analyze_serror - analyze SError for a failed port 1530 * @link: ATA link to analyze SError for 1531 * 1532 * Analyze SError if available and further determine cause of 1533 * failure. 1534 * 1535 * LOCKING: 1536 * None. 1537 */ 1538 static void ata_eh_analyze_serror(struct ata_link *link) 1539 { 1540 struct ata_eh_context *ehc = &link->eh_context; 1541 u32 serror = ehc->i.serror; 1542 unsigned int err_mask = 0, action = 0; 1543 u32 hotplug_mask; 1544 1545 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1546 err_mask |= AC_ERR_ATA_BUS; 1547 action |= ATA_EH_RESET; 1548 } 1549 if (serror & SERR_PROTOCOL) { 1550 err_mask |= AC_ERR_HSM; 1551 action |= ATA_EH_RESET; 1552 } 1553 if (serror & SERR_INTERNAL) { 1554 err_mask |= AC_ERR_SYSTEM; 1555 action |= ATA_EH_RESET; 1556 } 1557 1558 /* Determine whether a hotplug event has occurred. Both 1559 * SError.N/X are considered hotplug events for enabled or 1560 * host links. For disabled PMP links, only N bit is 1561 * considered as X bit is left at 1 for link plugging. 1562 */ 1563 hotplug_mask = 0; 1564 1565 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1566 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1567 else 1568 hotplug_mask = SERR_PHYRDY_CHG; 1569 1570 if (serror & hotplug_mask) 1571 ata_ehi_hotplugged(&ehc->i); 1572 1573 ehc->i.err_mask |= err_mask; 1574 ehc->i.action |= action; 1575 } 1576 1577 /** 1578 * ata_eh_analyze_ncq_error - analyze NCQ error 1579 * @link: ATA link to analyze NCQ error for 1580 * 1581 * Read log page 10h, determine the offending qc and acquire 1582 * error status TF. For NCQ device errors, all LLDDs have to do 1583 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1584 * care of the rest. 1585 * 1586 * LOCKING: 1587 * Kernel thread context (may sleep). 1588 */ 1589 void ata_eh_analyze_ncq_error(struct ata_link *link) 1590 { 1591 struct ata_port *ap = link->ap; 1592 struct ata_eh_context *ehc = &link->eh_context; 1593 struct ata_device *dev = link->device; 1594 struct ata_queued_cmd *qc; 1595 struct ata_taskfile tf; 1596 int tag, rc; 1597 1598 /* if frozen, we can't do much */ 1599 if (ap->pflags & ATA_PFLAG_FROZEN) 1600 return; 1601 1602 /* is it NCQ device error? */ 1603 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1604 return; 1605 1606 /* has LLDD analyzed already? */ 1607 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1608 qc = __ata_qc_from_tag(ap, tag); 1609 1610 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1611 continue; 1612 1613 if (qc->err_mask) 1614 return; 1615 } 1616 1617 /* okay, this error is ours */ 1618 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1619 if (rc) { 1620 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1621 "(errno=%d)\n", rc); 1622 return; 1623 } 1624 1625 if (!(link->sactive & (1 << tag))) { 1626 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1627 "inactive tag %d\n", tag); 1628 return; 1629 } 1630 1631 /* we've got the perpetrator, condemn it */ 1632 qc = __ata_qc_from_tag(ap, tag); 1633 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1634 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1635 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1636 ehc->i.err_mask &= ~AC_ERR_DEV; 1637 } 1638 1639 /** 1640 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1641 * @qc: qc to analyze 1642 * @tf: Taskfile registers to analyze 1643 * 1644 * Analyze taskfile of @qc and further determine cause of 1645 * failure. This function also requests ATAPI sense data if 1646 * avaliable. 1647 * 1648 * LOCKING: 1649 * Kernel thread context (may sleep). 1650 * 1651 * RETURNS: 1652 * Determined recovery action 1653 */ 1654 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1655 const struct ata_taskfile *tf) 1656 { 1657 unsigned int tmp, action = 0; 1658 u8 stat = tf->command, err = tf->feature; 1659 1660 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1661 qc->err_mask |= AC_ERR_HSM; 1662 return ATA_EH_RESET; 1663 } 1664 1665 if (stat & (ATA_ERR | ATA_DF)) 1666 qc->err_mask |= AC_ERR_DEV; 1667 else 1668 return 0; 1669 1670 switch (qc->dev->class) { 1671 case ATA_DEV_ATA: 1672 if (err & ATA_ICRC) 1673 qc->err_mask |= AC_ERR_ATA_BUS; 1674 if (err & ATA_UNC) 1675 qc->err_mask |= AC_ERR_MEDIA; 1676 if (err & ATA_IDNF) 1677 qc->err_mask |= AC_ERR_INVALID; 1678 break; 1679 1680 case ATA_DEV_ATAPI: 1681 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1682 tmp = atapi_eh_request_sense(qc->dev, 1683 qc->scsicmd->sense_buffer, 1684 qc->result_tf.feature >> 4); 1685 if (!tmp) { 1686 /* ATA_QCFLAG_SENSE_VALID is used to 1687 * tell atapi_qc_complete() that sense 1688 * data is already valid. 1689 * 1690 * TODO: interpret sense data and set 1691 * appropriate err_mask. 1692 */ 1693 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1694 } else 1695 qc->err_mask |= tmp; 1696 } 1697 } 1698 1699 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1700 action |= ATA_EH_RESET; 1701 1702 return action; 1703 } 1704 1705 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1706 int *xfer_ok) 1707 { 1708 int base = 0; 1709 1710 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1711 *xfer_ok = 1; 1712 1713 if (!*xfer_ok) 1714 base = ATA_ECAT_DUBIOUS_NONE; 1715 1716 if (err_mask & AC_ERR_ATA_BUS) 1717 return base + ATA_ECAT_ATA_BUS; 1718 1719 if (err_mask & AC_ERR_TIMEOUT) 1720 return base + ATA_ECAT_TOUT_HSM; 1721 1722 if (eflags & ATA_EFLAG_IS_IO) { 1723 if (err_mask & AC_ERR_HSM) 1724 return base + ATA_ECAT_TOUT_HSM; 1725 if ((err_mask & 1726 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1727 return base + ATA_ECAT_UNK_DEV; 1728 } 1729 1730 return 0; 1731 } 1732 1733 struct speed_down_verdict_arg { 1734 u64 since; 1735 int xfer_ok; 1736 int nr_errors[ATA_ECAT_NR]; 1737 }; 1738 1739 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1740 { 1741 struct speed_down_verdict_arg *arg = void_arg; 1742 int cat; 1743 1744 if (ent->timestamp < arg->since) 1745 return -1; 1746 1747 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1748 &arg->xfer_ok); 1749 arg->nr_errors[cat]++; 1750 1751 return 0; 1752 } 1753 1754 /** 1755 * ata_eh_speed_down_verdict - Determine speed down verdict 1756 * @dev: Device of interest 1757 * 1758 * This function examines error ring of @dev and determines 1759 * whether NCQ needs to be turned off, transfer speed should be 1760 * stepped down, or falling back to PIO is necessary. 1761 * 1762 * ECAT_ATA_BUS : ATA_BUS error for any command 1763 * 1764 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1765 * IO commands 1766 * 1767 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1768 * 1769 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1770 * data transfer hasn't been verified. 1771 * 1772 * Verdicts are 1773 * 1774 * NCQ_OFF : Turn off NCQ. 1775 * 1776 * SPEED_DOWN : Speed down transfer speed but don't fall back 1777 * to PIO. 1778 * 1779 * FALLBACK_TO_PIO : Fall back to PIO. 1780 * 1781 * Even if multiple verdicts are returned, only one action is 1782 * taken per error. An action triggered by non-DUBIOUS errors 1783 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1784 * This is to expedite speed down decisions right after device is 1785 * initially configured. 1786 * 1787 * The followings are speed down rules. #1 and #2 deal with 1788 * DUBIOUS errors. 1789 * 1790 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1791 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1792 * 1793 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1794 * occurred during last 5 mins, NCQ_OFF. 1795 * 1796 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1797 * ocurred during last 5 mins, FALLBACK_TO_PIO 1798 * 1799 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1800 * during last 10 mins, NCQ_OFF. 1801 * 1802 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1803 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1804 * 1805 * LOCKING: 1806 * Inherited from caller. 1807 * 1808 * RETURNS: 1809 * OR of ATA_EH_SPDN_* flags. 1810 */ 1811 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1812 { 1813 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1814 u64 j64 = get_jiffies_64(); 1815 struct speed_down_verdict_arg arg; 1816 unsigned int verdict = 0; 1817 1818 /* scan past 5 mins of error history */ 1819 memset(&arg, 0, sizeof(arg)); 1820 arg.since = j64 - min(j64, j5mins); 1821 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1822 1823 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1824 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1825 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1826 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1827 1828 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1829 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1830 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1831 1832 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1833 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1834 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1835 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1836 1837 /* scan past 10 mins of error history */ 1838 memset(&arg, 0, sizeof(arg)); 1839 arg.since = j64 - min(j64, j10mins); 1840 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1841 1842 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1843 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1844 verdict |= ATA_EH_SPDN_NCQ_OFF; 1845 1846 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1847 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1848 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1849 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1850 1851 return verdict; 1852 } 1853 1854 /** 1855 * ata_eh_speed_down - record error and speed down if necessary 1856 * @dev: Failed device 1857 * @eflags: mask of ATA_EFLAG_* flags 1858 * @err_mask: err_mask of the error 1859 * 1860 * Record error and examine error history to determine whether 1861 * adjusting transmission speed is necessary. It also sets 1862 * transmission limits appropriately if such adjustment is 1863 * necessary. 1864 * 1865 * LOCKING: 1866 * Kernel thread context (may sleep). 1867 * 1868 * RETURNS: 1869 * Determined recovery action. 1870 */ 1871 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1872 unsigned int eflags, unsigned int err_mask) 1873 { 1874 struct ata_link *link = ata_dev_phys_link(dev); 1875 int xfer_ok = 0; 1876 unsigned int verdict; 1877 unsigned int action = 0; 1878 1879 /* don't bother if Cat-0 error */ 1880 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1881 return 0; 1882 1883 /* record error and determine whether speed down is necessary */ 1884 ata_ering_record(&dev->ering, eflags, err_mask); 1885 verdict = ata_eh_speed_down_verdict(dev); 1886 1887 /* turn off NCQ? */ 1888 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1889 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1890 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1891 dev->flags |= ATA_DFLAG_NCQ_OFF; 1892 ata_dev_printk(dev, KERN_WARNING, 1893 "NCQ disabled due to excessive errors\n"); 1894 goto done; 1895 } 1896 1897 /* speed down? */ 1898 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1899 /* speed down SATA link speed if possible */ 1900 if (sata_down_spd_limit(link, 0) == 0) { 1901 action |= ATA_EH_RESET; 1902 goto done; 1903 } 1904 1905 /* lower transfer mode */ 1906 if (dev->spdn_cnt < 2) { 1907 static const int dma_dnxfer_sel[] = 1908 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1909 static const int pio_dnxfer_sel[] = 1910 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1911 int sel; 1912 1913 if (dev->xfer_shift != ATA_SHIFT_PIO) 1914 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1915 else 1916 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1917 1918 dev->spdn_cnt++; 1919 1920 if (ata_down_xfermask_limit(dev, sel) == 0) { 1921 action |= ATA_EH_RESET; 1922 goto done; 1923 } 1924 } 1925 } 1926 1927 /* Fall back to PIO? Slowing down to PIO is meaningless for 1928 * SATA ATA devices. Consider it only for PATA and SATAPI. 1929 */ 1930 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1931 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1932 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1933 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1934 dev->spdn_cnt = 0; 1935 action |= ATA_EH_RESET; 1936 goto done; 1937 } 1938 } 1939 1940 return 0; 1941 done: 1942 /* device has been slowed down, blow error history */ 1943 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1944 ata_ering_clear(&dev->ering); 1945 return action; 1946 } 1947 1948 /** 1949 * ata_eh_link_autopsy - analyze error and determine recovery action 1950 * @link: host link to perform autopsy on 1951 * 1952 * Analyze why @link failed and determine which recovery actions 1953 * are needed. This function also sets more detailed AC_ERR_* 1954 * values and fills sense data for ATAPI CHECK SENSE. 1955 * 1956 * LOCKING: 1957 * Kernel thread context (may sleep). 1958 */ 1959 static void ata_eh_link_autopsy(struct ata_link *link) 1960 { 1961 struct ata_port *ap = link->ap; 1962 struct ata_eh_context *ehc = &link->eh_context; 1963 struct ata_device *dev; 1964 unsigned int all_err_mask = 0, eflags = 0; 1965 int tag; 1966 u32 serror; 1967 int rc; 1968 1969 DPRINTK("ENTER\n"); 1970 1971 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1972 return; 1973 1974 /* obtain and analyze SError */ 1975 rc = sata_scr_read(link, SCR_ERROR, &serror); 1976 if (rc == 0) { 1977 ehc->i.serror |= serror; 1978 ata_eh_analyze_serror(link); 1979 } else if (rc != -EOPNOTSUPP) { 1980 /* SError read failed, force reset and probing */ 1981 ehc->i.probe_mask |= ATA_ALL_DEVICES; 1982 ehc->i.action |= ATA_EH_RESET; 1983 ehc->i.err_mask |= AC_ERR_OTHER; 1984 } 1985 1986 /* analyze NCQ failure */ 1987 ata_eh_analyze_ncq_error(link); 1988 1989 /* any real error trumps AC_ERR_OTHER */ 1990 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1991 ehc->i.err_mask &= ~AC_ERR_OTHER; 1992 1993 all_err_mask |= ehc->i.err_mask; 1994 1995 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1996 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1997 1998 if (!(qc->flags & ATA_QCFLAG_FAILED) || 1999 ata_dev_phys_link(qc->dev) != link) 2000 continue; 2001 2002 /* inherit upper level err_mask */ 2003 qc->err_mask |= ehc->i.err_mask; 2004 2005 /* analyze TF */ 2006 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2007 2008 /* DEV errors are probably spurious in case of ATA_BUS error */ 2009 if (qc->err_mask & AC_ERR_ATA_BUS) 2010 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2011 AC_ERR_INVALID); 2012 2013 /* any real error trumps unknown error */ 2014 if (qc->err_mask & ~AC_ERR_OTHER) 2015 qc->err_mask &= ~AC_ERR_OTHER; 2016 2017 /* SENSE_VALID trumps dev/unknown error and revalidation */ 2018 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2019 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2020 2021 /* determine whether the command is worth retrying */ 2022 if (!(qc->err_mask & AC_ERR_INVALID) && 2023 ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) 2024 qc->flags |= ATA_QCFLAG_RETRY; 2025 2026 /* accumulate error info */ 2027 ehc->i.dev = qc->dev; 2028 all_err_mask |= qc->err_mask; 2029 if (qc->flags & ATA_QCFLAG_IO) 2030 eflags |= ATA_EFLAG_IS_IO; 2031 } 2032 2033 /* enforce default EH actions */ 2034 if (ap->pflags & ATA_PFLAG_FROZEN || 2035 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2036 ehc->i.action |= ATA_EH_RESET; 2037 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2038 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2039 ehc->i.action |= ATA_EH_REVALIDATE; 2040 2041 /* If we have offending qcs and the associated failed device, 2042 * perform per-dev EH action only on the offending device. 2043 */ 2044 if (ehc->i.dev) { 2045 ehc->i.dev_action[ehc->i.dev->devno] |= 2046 ehc->i.action & ATA_EH_PERDEV_MASK; 2047 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2048 } 2049 2050 /* propagate timeout to host link */ 2051 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2052 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2053 2054 /* record error and consider speeding down */ 2055 dev = ehc->i.dev; 2056 if (!dev && ((ata_link_max_devices(link) == 1 && 2057 ata_dev_enabled(link->device)))) 2058 dev = link->device; 2059 2060 if (dev) { 2061 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2062 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2063 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2064 } 2065 2066 DPRINTK("EXIT\n"); 2067 } 2068 2069 /** 2070 * ata_eh_autopsy - analyze error and determine recovery action 2071 * @ap: host port to perform autopsy on 2072 * 2073 * Analyze all links of @ap and determine why they failed and 2074 * which recovery actions are needed. 2075 * 2076 * LOCKING: 2077 * Kernel thread context (may sleep). 2078 */ 2079 void ata_eh_autopsy(struct ata_port *ap) 2080 { 2081 struct ata_link *link; 2082 2083 ata_for_each_link(link, ap, EDGE) 2084 ata_eh_link_autopsy(link); 2085 2086 /* Handle the frigging slave link. Autopsy is done similarly 2087 * but actions and flags are transferred over to the master 2088 * link and handled from there. 2089 */ 2090 if (ap->slave_link) { 2091 struct ata_eh_context *mehc = &ap->link.eh_context; 2092 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2093 2094 /* transfer control flags from master to slave */ 2095 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2096 2097 /* perform autopsy on the slave link */ 2098 ata_eh_link_autopsy(ap->slave_link); 2099 2100 /* transfer actions from slave to master and clear slave */ 2101 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2102 mehc->i.action |= sehc->i.action; 2103 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2104 mehc->i.flags |= sehc->i.flags; 2105 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2106 } 2107 2108 /* Autopsy of fanout ports can affect host link autopsy. 2109 * Perform host link autopsy last. 2110 */ 2111 if (sata_pmp_attached(ap)) 2112 ata_eh_link_autopsy(&ap->link); 2113 } 2114 2115 /** 2116 * ata_get_cmd_descript - get description for ATA command 2117 * @command: ATA command code to get description for 2118 * 2119 * Return a textual description of the given command, or NULL if the 2120 * command is not known. 2121 * 2122 * LOCKING: 2123 * None 2124 */ 2125 const char *ata_get_cmd_descript(u8 command) 2126 { 2127 #ifdef CONFIG_ATA_VERBOSE_ERROR 2128 static const struct 2129 { 2130 u8 command; 2131 const char *text; 2132 } cmd_descr[] = { 2133 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2134 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2135 { ATA_CMD_STANDBY, "STANDBY" }, 2136 { ATA_CMD_IDLE, "IDLE" }, 2137 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2138 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2139 { ATA_CMD_NOP, "NOP" }, 2140 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2141 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2142 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2143 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2144 { ATA_CMD_SERVICE, "SERVICE" }, 2145 { ATA_CMD_READ, "READ DMA" }, 2146 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2147 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2148 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2149 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2150 { ATA_CMD_WRITE, "WRITE DMA" }, 2151 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2152 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2153 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2154 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2155 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2156 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2157 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2158 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2159 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2160 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2161 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2162 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2163 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2164 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2165 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2166 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2167 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2168 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2169 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2170 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2171 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2172 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2173 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2174 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2175 { ATA_CMD_SLEEP, "SLEEP" }, 2176 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2177 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2178 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2179 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2180 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2181 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2182 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2183 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2184 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2185 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2186 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2187 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2188 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2189 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2190 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2191 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2192 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2193 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2194 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2195 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2196 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2197 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2198 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2199 { ATA_CMD_SMART, "SMART" }, 2200 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2201 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2202 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2203 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2204 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2205 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2206 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2207 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2208 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2209 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2210 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2211 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2212 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2213 { 0, NULL } /* terminate list */ 2214 }; 2215 2216 unsigned int i; 2217 for (i = 0; cmd_descr[i].text; i++) 2218 if (cmd_descr[i].command == command) 2219 return cmd_descr[i].text; 2220 #endif 2221 2222 return NULL; 2223 } 2224 2225 /** 2226 * ata_eh_link_report - report error handling to user 2227 * @link: ATA link EH is going on 2228 * 2229 * Report EH to user. 2230 * 2231 * LOCKING: 2232 * None. 2233 */ 2234 static void ata_eh_link_report(struct ata_link *link) 2235 { 2236 struct ata_port *ap = link->ap; 2237 struct ata_eh_context *ehc = &link->eh_context; 2238 const char *frozen, *desc; 2239 char tries_buf[6]; 2240 int tag, nr_failed = 0; 2241 2242 if (ehc->i.flags & ATA_EHI_QUIET) 2243 return; 2244 2245 desc = NULL; 2246 if (ehc->i.desc[0] != '\0') 2247 desc = ehc->i.desc; 2248 2249 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2250 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2251 2252 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2253 ata_dev_phys_link(qc->dev) != link || 2254 ((qc->flags & ATA_QCFLAG_QUIET) && 2255 qc->err_mask == AC_ERR_DEV)) 2256 continue; 2257 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2258 continue; 2259 2260 nr_failed++; 2261 } 2262 2263 if (!nr_failed && !ehc->i.err_mask) 2264 return; 2265 2266 frozen = ""; 2267 if (ap->pflags & ATA_PFLAG_FROZEN) 2268 frozen = " frozen"; 2269 2270 memset(tries_buf, 0, sizeof(tries_buf)); 2271 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2272 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 2273 ap->eh_tries); 2274 2275 if (ehc->i.dev) { 2276 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 2277 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2278 ehc->i.err_mask, link->sactive, ehc->i.serror, 2279 ehc->i.action, frozen, tries_buf); 2280 if (desc) 2281 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 2282 } else { 2283 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 2284 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2285 ehc->i.err_mask, link->sactive, ehc->i.serror, 2286 ehc->i.action, frozen, tries_buf); 2287 if (desc) 2288 ata_link_printk(link, KERN_ERR, "%s\n", desc); 2289 } 2290 2291 #ifdef CONFIG_ATA_VERBOSE_ERROR 2292 if (ehc->i.serror) 2293 ata_link_printk(link, KERN_ERR, 2294 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2295 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2296 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2297 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2298 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2299 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2300 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2301 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2302 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2303 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2304 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2305 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2306 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2307 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2308 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2309 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2310 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2311 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2312 #endif 2313 2314 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2315 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2316 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2317 const u8 *cdb = qc->cdb; 2318 char data_buf[20] = ""; 2319 char cdb_buf[70] = ""; 2320 2321 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2322 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2323 continue; 2324 2325 if (qc->dma_dir != DMA_NONE) { 2326 static const char *dma_str[] = { 2327 [DMA_BIDIRECTIONAL] = "bidi", 2328 [DMA_TO_DEVICE] = "out", 2329 [DMA_FROM_DEVICE] = "in", 2330 }; 2331 static const char *prot_str[] = { 2332 [ATA_PROT_PIO] = "pio", 2333 [ATA_PROT_DMA] = "dma", 2334 [ATA_PROT_NCQ] = "ncq", 2335 [ATAPI_PROT_PIO] = "pio", 2336 [ATAPI_PROT_DMA] = "dma", 2337 }; 2338 2339 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2340 prot_str[qc->tf.protocol], qc->nbytes, 2341 dma_str[qc->dma_dir]); 2342 } 2343 2344 if (ata_is_atapi(qc->tf.protocol)) { 2345 if (qc->scsicmd) 2346 scsi_print_command(qc->scsicmd); 2347 else 2348 snprintf(cdb_buf, sizeof(cdb_buf), 2349 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 2350 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 2351 cdb[0], cdb[1], cdb[2], cdb[3], 2352 cdb[4], cdb[5], cdb[6], cdb[7], 2353 cdb[8], cdb[9], cdb[10], cdb[11], 2354 cdb[12], cdb[13], cdb[14], cdb[15]); 2355 } else { 2356 const char *descr = ata_get_cmd_descript(cmd->command); 2357 if (descr) 2358 ata_dev_printk(qc->dev, KERN_ERR, 2359 "failed command: %s\n", descr); 2360 } 2361 2362 ata_dev_printk(qc->dev, KERN_ERR, 2363 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2364 "tag %d%s\n %s" 2365 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2366 "Emask 0x%x (%s)%s\n", 2367 cmd->command, cmd->feature, cmd->nsect, 2368 cmd->lbal, cmd->lbam, cmd->lbah, 2369 cmd->hob_feature, cmd->hob_nsect, 2370 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2371 cmd->device, qc->tag, data_buf, cdb_buf, 2372 res->command, res->feature, res->nsect, 2373 res->lbal, res->lbam, res->lbah, 2374 res->hob_feature, res->hob_nsect, 2375 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2376 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2377 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2378 2379 #ifdef CONFIG_ATA_VERBOSE_ERROR 2380 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2381 ATA_ERR)) { 2382 if (res->command & ATA_BUSY) 2383 ata_dev_printk(qc->dev, KERN_ERR, 2384 "status: { Busy }\n"); 2385 else 2386 ata_dev_printk(qc->dev, KERN_ERR, 2387 "status: { %s%s%s%s}\n", 2388 res->command & ATA_DRDY ? "DRDY " : "", 2389 res->command & ATA_DF ? "DF " : "", 2390 res->command & ATA_DRQ ? "DRQ " : "", 2391 res->command & ATA_ERR ? "ERR " : ""); 2392 } 2393 2394 if (cmd->command != ATA_CMD_PACKET && 2395 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2396 ATA_ABORTED))) 2397 ata_dev_printk(qc->dev, KERN_ERR, 2398 "error: { %s%s%s%s}\n", 2399 res->feature & ATA_ICRC ? "ICRC " : "", 2400 res->feature & ATA_UNC ? "UNC " : "", 2401 res->feature & ATA_IDNF ? "IDNF " : "", 2402 res->feature & ATA_ABORTED ? "ABRT " : ""); 2403 #endif 2404 } 2405 } 2406 2407 /** 2408 * ata_eh_report - report error handling to user 2409 * @ap: ATA port to report EH about 2410 * 2411 * Report EH to user. 2412 * 2413 * LOCKING: 2414 * None. 2415 */ 2416 void ata_eh_report(struct ata_port *ap) 2417 { 2418 struct ata_link *link; 2419 2420 ata_for_each_link(link, ap, HOST_FIRST) 2421 ata_eh_link_report(link); 2422 } 2423 2424 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2425 unsigned int *classes, unsigned long deadline, 2426 bool clear_classes) 2427 { 2428 struct ata_device *dev; 2429 2430 if (clear_classes) 2431 ata_for_each_dev(dev, link, ALL) 2432 classes[dev->devno] = ATA_DEV_UNKNOWN; 2433 2434 return reset(link, classes, deadline); 2435 } 2436 2437 static int ata_eh_followup_srst_needed(struct ata_link *link, 2438 int rc, const unsigned int *classes) 2439 { 2440 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2441 return 0; 2442 if (rc == -EAGAIN) 2443 return 1; 2444 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2445 return 1; 2446 return 0; 2447 } 2448 2449 int ata_eh_reset(struct ata_link *link, int classify, 2450 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2451 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2452 { 2453 struct ata_port *ap = link->ap; 2454 struct ata_link *slave = ap->slave_link; 2455 struct ata_eh_context *ehc = &link->eh_context; 2456 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2457 unsigned int *classes = ehc->classes; 2458 unsigned int lflags = link->flags; 2459 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2460 int max_tries = 0, try = 0; 2461 struct ata_link *failed_link; 2462 struct ata_device *dev; 2463 unsigned long deadline, now; 2464 ata_reset_fn_t reset; 2465 unsigned long flags; 2466 u32 sstatus; 2467 int nr_unknown, rc; 2468 2469 /* 2470 * Prepare to reset 2471 */ 2472 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2473 max_tries++; 2474 if (link->flags & ATA_LFLAG_NO_HRST) 2475 hardreset = NULL; 2476 if (link->flags & ATA_LFLAG_NO_SRST) 2477 softreset = NULL; 2478 2479 /* make sure each reset attemp is at least COOL_DOWN apart */ 2480 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2481 now = jiffies; 2482 WARN_ON(time_after(ehc->last_reset, now)); 2483 deadline = ata_deadline(ehc->last_reset, 2484 ATA_EH_RESET_COOL_DOWN); 2485 if (time_before(now, deadline)) 2486 schedule_timeout_uninterruptible(deadline - now); 2487 } 2488 2489 spin_lock_irqsave(ap->lock, flags); 2490 ap->pflags |= ATA_PFLAG_RESETTING; 2491 spin_unlock_irqrestore(ap->lock, flags); 2492 2493 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2494 2495 ata_for_each_dev(dev, link, ALL) { 2496 /* If we issue an SRST then an ATA drive (not ATAPI) 2497 * may change configuration and be in PIO0 timing. If 2498 * we do a hard reset (or are coming from power on) 2499 * this is true for ATA or ATAPI. Until we've set a 2500 * suitable controller mode we should not touch the 2501 * bus as we may be talking too fast. 2502 */ 2503 dev->pio_mode = XFER_PIO_0; 2504 2505 /* If the controller has a pio mode setup function 2506 * then use it to set the chipset to rights. Don't 2507 * touch the DMA setup as that will be dealt with when 2508 * configuring devices. 2509 */ 2510 if (ap->ops->set_piomode) 2511 ap->ops->set_piomode(ap, dev); 2512 } 2513 2514 /* prefer hardreset */ 2515 reset = NULL; 2516 ehc->i.action &= ~ATA_EH_RESET; 2517 if (hardreset) { 2518 reset = hardreset; 2519 ehc->i.action |= ATA_EH_HARDRESET; 2520 } else if (softreset) { 2521 reset = softreset; 2522 ehc->i.action |= ATA_EH_SOFTRESET; 2523 } 2524 2525 if (prereset) { 2526 unsigned long deadline = ata_deadline(jiffies, 2527 ATA_EH_PRERESET_TIMEOUT); 2528 2529 if (slave) { 2530 sehc->i.action &= ~ATA_EH_RESET; 2531 sehc->i.action |= ehc->i.action; 2532 } 2533 2534 rc = prereset(link, deadline); 2535 2536 /* If present, do prereset on slave link too. Reset 2537 * is skipped iff both master and slave links report 2538 * -ENOENT or clear ATA_EH_RESET. 2539 */ 2540 if (slave && (rc == 0 || rc == -ENOENT)) { 2541 int tmp; 2542 2543 tmp = prereset(slave, deadline); 2544 if (tmp != -ENOENT) 2545 rc = tmp; 2546 2547 ehc->i.action |= sehc->i.action; 2548 } 2549 2550 if (rc) { 2551 if (rc == -ENOENT) { 2552 ata_link_printk(link, KERN_DEBUG, 2553 "port disabled. ignoring.\n"); 2554 ehc->i.action &= ~ATA_EH_RESET; 2555 2556 ata_for_each_dev(dev, link, ALL) 2557 classes[dev->devno] = ATA_DEV_NONE; 2558 2559 rc = 0; 2560 } else 2561 ata_link_printk(link, KERN_ERR, 2562 "prereset failed (errno=%d)\n", rc); 2563 goto out; 2564 } 2565 2566 /* prereset() might have cleared ATA_EH_RESET. If so, 2567 * bang classes, thaw and return. 2568 */ 2569 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2570 ata_for_each_dev(dev, link, ALL) 2571 classes[dev->devno] = ATA_DEV_NONE; 2572 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2573 ata_is_host_link(link)) 2574 ata_eh_thaw_port(ap); 2575 rc = 0; 2576 goto out; 2577 } 2578 } 2579 2580 retry: 2581 /* 2582 * Perform reset 2583 */ 2584 if (ata_is_host_link(link)) 2585 ata_eh_freeze_port(ap); 2586 2587 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2588 2589 if (reset) { 2590 if (verbose) 2591 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2592 reset == softreset ? "soft" : "hard"); 2593 2594 /* mark that this EH session started with reset */ 2595 ehc->last_reset = jiffies; 2596 if (reset == hardreset) 2597 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2598 else 2599 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2600 2601 rc = ata_do_reset(link, reset, classes, deadline, true); 2602 if (rc && rc != -EAGAIN) { 2603 failed_link = link; 2604 goto fail; 2605 } 2606 2607 /* hardreset slave link if existent */ 2608 if (slave && reset == hardreset) { 2609 int tmp; 2610 2611 if (verbose) 2612 ata_link_printk(slave, KERN_INFO, 2613 "hard resetting link\n"); 2614 2615 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2616 tmp = ata_do_reset(slave, reset, classes, deadline, 2617 false); 2618 switch (tmp) { 2619 case -EAGAIN: 2620 rc = -EAGAIN; 2621 case 0: 2622 break; 2623 default: 2624 failed_link = slave; 2625 rc = tmp; 2626 goto fail; 2627 } 2628 } 2629 2630 /* perform follow-up SRST if necessary */ 2631 if (reset == hardreset && 2632 ata_eh_followup_srst_needed(link, rc, classes)) { 2633 reset = softreset; 2634 2635 if (!reset) { 2636 ata_link_printk(link, KERN_ERR, 2637 "follow-up softreset required " 2638 "but no softreset avaliable\n"); 2639 failed_link = link; 2640 rc = -EINVAL; 2641 goto fail; 2642 } 2643 2644 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2645 rc = ata_do_reset(link, reset, classes, deadline, true); 2646 if (rc) { 2647 failed_link = link; 2648 goto fail; 2649 } 2650 } 2651 } else { 2652 if (verbose) 2653 ata_link_printk(link, KERN_INFO, "no reset method " 2654 "available, skipping reset\n"); 2655 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2656 lflags |= ATA_LFLAG_ASSUME_ATA; 2657 } 2658 2659 /* 2660 * Post-reset processing 2661 */ 2662 ata_for_each_dev(dev, link, ALL) { 2663 /* After the reset, the device state is PIO 0 and the 2664 * controller state is undefined. Reset also wakes up 2665 * drives from sleeping mode. 2666 */ 2667 dev->pio_mode = XFER_PIO_0; 2668 dev->flags &= ~ATA_DFLAG_SLEEPING; 2669 2670 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2671 continue; 2672 2673 /* apply class override */ 2674 if (lflags & ATA_LFLAG_ASSUME_ATA) 2675 classes[dev->devno] = ATA_DEV_ATA; 2676 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2677 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2678 } 2679 2680 /* record current link speed */ 2681 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2682 link->sata_spd = (sstatus >> 4) & 0xf; 2683 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2684 slave->sata_spd = (sstatus >> 4) & 0xf; 2685 2686 /* thaw the port */ 2687 if (ata_is_host_link(link)) 2688 ata_eh_thaw_port(ap); 2689 2690 /* postreset() should clear hardware SError. Although SError 2691 * is cleared during link resume, clearing SError here is 2692 * necessary as some PHYs raise hotplug events after SRST. 2693 * This introduces race condition where hotplug occurs between 2694 * reset and here. This race is mediated by cross checking 2695 * link onlineness and classification result later. 2696 */ 2697 if (postreset) { 2698 postreset(link, classes); 2699 if (slave) 2700 postreset(slave, classes); 2701 } 2702 2703 /* 2704 * Some controllers can't be frozen very well and may set 2705 * spuruious error conditions during reset. Clear accumulated 2706 * error information. As reset is the final recovery action, 2707 * nothing is lost by doing this. 2708 */ 2709 spin_lock_irqsave(link->ap->lock, flags); 2710 memset(&link->eh_info, 0, sizeof(link->eh_info)); 2711 if (slave) 2712 memset(&slave->eh_info, 0, sizeof(link->eh_info)); 2713 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 2714 spin_unlock_irqrestore(link->ap->lock, flags); 2715 2716 /* 2717 * Make sure onlineness and classification result correspond. 2718 * Hotplug could have happened during reset and some 2719 * controllers fail to wait while a drive is spinning up after 2720 * being hotplugged causing misdetection. By cross checking 2721 * link on/offlineness and classification result, those 2722 * conditions can be reliably detected and retried. 2723 */ 2724 nr_unknown = 0; 2725 ata_for_each_dev(dev, link, ALL) { 2726 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2727 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2728 ata_dev_printk(dev, KERN_DEBUG, "link online " 2729 "but device misclassifed\n"); 2730 classes[dev->devno] = ATA_DEV_NONE; 2731 nr_unknown++; 2732 } 2733 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2734 if (ata_class_enabled(classes[dev->devno])) 2735 ata_dev_printk(dev, KERN_DEBUG, "link offline, " 2736 "clearing class %d to NONE\n", 2737 classes[dev->devno]); 2738 classes[dev->devno] = ATA_DEV_NONE; 2739 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2740 ata_dev_printk(dev, KERN_DEBUG, "link status unknown, " 2741 "clearing UNKNOWN to NONE\n"); 2742 classes[dev->devno] = ATA_DEV_NONE; 2743 } 2744 } 2745 2746 if (classify && nr_unknown) { 2747 if (try < max_tries) { 2748 ata_link_printk(link, KERN_WARNING, "link online but " 2749 "%d devices misclassified, retrying\n", 2750 nr_unknown); 2751 failed_link = link; 2752 rc = -EAGAIN; 2753 goto fail; 2754 } 2755 ata_link_printk(link, KERN_WARNING, 2756 "link online but %d devices misclassified, " 2757 "device detection might fail\n", nr_unknown); 2758 } 2759 2760 /* reset successful, schedule revalidation */ 2761 ata_eh_done(link, NULL, ATA_EH_RESET); 2762 if (slave) 2763 ata_eh_done(slave, NULL, ATA_EH_RESET); 2764 ehc->last_reset = jiffies; /* update to completion time */ 2765 ehc->i.action |= ATA_EH_REVALIDATE; 2766 2767 rc = 0; 2768 out: 2769 /* clear hotplug flag */ 2770 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2771 if (slave) 2772 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2773 2774 spin_lock_irqsave(ap->lock, flags); 2775 ap->pflags &= ~ATA_PFLAG_RESETTING; 2776 spin_unlock_irqrestore(ap->lock, flags); 2777 2778 return rc; 2779 2780 fail: 2781 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2782 if (!ata_is_host_link(link) && 2783 sata_scr_read(link, SCR_STATUS, &sstatus)) 2784 rc = -ERESTART; 2785 2786 if (rc == -ERESTART || try >= max_tries) 2787 goto out; 2788 2789 now = jiffies; 2790 if (time_before(now, deadline)) { 2791 unsigned long delta = deadline - now; 2792 2793 ata_link_printk(failed_link, KERN_WARNING, 2794 "reset failed (errno=%d), retrying in %u secs\n", 2795 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2796 2797 while (delta) 2798 delta = schedule_timeout_uninterruptible(delta); 2799 } 2800 2801 if (try == max_tries - 1) { 2802 sata_down_spd_limit(link, 0); 2803 if (slave) 2804 sata_down_spd_limit(slave, 0); 2805 } else if (rc == -EPIPE) 2806 sata_down_spd_limit(failed_link, 0); 2807 2808 if (hardreset) 2809 reset = hardreset; 2810 goto retry; 2811 } 2812 2813 static inline void ata_eh_pull_park_action(struct ata_port *ap) 2814 { 2815 struct ata_link *link; 2816 struct ata_device *dev; 2817 unsigned long flags; 2818 2819 /* 2820 * This function can be thought of as an extended version of 2821 * ata_eh_about_to_do() specially crafted to accommodate the 2822 * requirements of ATA_EH_PARK handling. Since the EH thread 2823 * does not leave the do {} while () loop in ata_eh_recover as 2824 * long as the timeout for a park request to *one* device on 2825 * the port has not expired, and since we still want to pick 2826 * up park requests to other devices on the same port or 2827 * timeout updates for the same device, we have to pull 2828 * ATA_EH_PARK actions from eh_info into eh_context.i 2829 * ourselves at the beginning of each pass over the loop. 2830 * 2831 * Additionally, all write accesses to &ap->park_req_pending 2832 * through INIT_COMPLETION() (see below) or complete_all() 2833 * (see ata_scsi_park_store()) are protected by the host lock. 2834 * As a result we have that park_req_pending.done is zero on 2835 * exit from this function, i.e. when ATA_EH_PARK actions for 2836 * *all* devices on port ap have been pulled into the 2837 * respective eh_context structs. If, and only if, 2838 * park_req_pending.done is non-zero by the time we reach 2839 * wait_for_completion_timeout(), another ATA_EH_PARK action 2840 * has been scheduled for at least one of the devices on port 2841 * ap and we have to cycle over the do {} while () loop in 2842 * ata_eh_recover() again. 2843 */ 2844 2845 spin_lock_irqsave(ap->lock, flags); 2846 INIT_COMPLETION(ap->park_req_pending); 2847 ata_for_each_link(link, ap, EDGE) { 2848 ata_for_each_dev(dev, link, ALL) { 2849 struct ata_eh_info *ehi = &link->eh_info; 2850 2851 link->eh_context.i.dev_action[dev->devno] |= 2852 ehi->dev_action[dev->devno] & ATA_EH_PARK; 2853 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 2854 } 2855 } 2856 spin_unlock_irqrestore(ap->lock, flags); 2857 } 2858 2859 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 2860 { 2861 struct ata_eh_context *ehc = &dev->link->eh_context; 2862 struct ata_taskfile tf; 2863 unsigned int err_mask; 2864 2865 ata_tf_init(dev, &tf); 2866 if (park) { 2867 ehc->unloaded_mask |= 1 << dev->devno; 2868 tf.command = ATA_CMD_IDLEIMMEDIATE; 2869 tf.feature = 0x44; 2870 tf.lbal = 0x4c; 2871 tf.lbam = 0x4e; 2872 tf.lbah = 0x55; 2873 } else { 2874 ehc->unloaded_mask &= ~(1 << dev->devno); 2875 tf.command = ATA_CMD_CHK_POWER; 2876 } 2877 2878 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 2879 tf.protocol |= ATA_PROT_NODATA; 2880 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 2881 if (park && (err_mask || tf.lbal != 0xc4)) { 2882 ata_dev_printk(dev, KERN_ERR, "head unload failed!\n"); 2883 ehc->unloaded_mask &= ~(1 << dev->devno); 2884 } 2885 } 2886 2887 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2888 struct ata_device **r_failed_dev) 2889 { 2890 struct ata_port *ap = link->ap; 2891 struct ata_eh_context *ehc = &link->eh_context; 2892 struct ata_device *dev; 2893 unsigned int new_mask = 0; 2894 unsigned long flags; 2895 int rc = 0; 2896 2897 DPRINTK("ENTER\n"); 2898 2899 /* For PATA drive side cable detection to work, IDENTIFY must 2900 * be done backwards such that PDIAG- is released by the slave 2901 * device before the master device is identified. 2902 */ 2903 ata_for_each_dev(dev, link, ALL_REVERSE) { 2904 unsigned int action = ata_eh_dev_action(dev); 2905 unsigned int readid_flags = 0; 2906 2907 if (ehc->i.flags & ATA_EHI_DID_RESET) 2908 readid_flags |= ATA_READID_POSTRESET; 2909 2910 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2911 WARN_ON(dev->class == ATA_DEV_PMP); 2912 2913 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2914 rc = -EIO; 2915 goto err; 2916 } 2917 2918 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2919 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2920 readid_flags); 2921 if (rc) 2922 goto err; 2923 2924 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2925 2926 /* Configuration may have changed, reconfigure 2927 * transfer mode. 2928 */ 2929 ehc->i.flags |= ATA_EHI_SETMODE; 2930 2931 /* schedule the scsi_rescan_device() here */ 2932 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2933 } else if (dev->class == ATA_DEV_UNKNOWN && 2934 ehc->tries[dev->devno] && 2935 ata_class_enabled(ehc->classes[dev->devno])) { 2936 /* Temporarily set dev->class, it will be 2937 * permanently set once all configurations are 2938 * complete. This is necessary because new 2939 * device configuration is done in two 2940 * separate loops. 2941 */ 2942 dev->class = ehc->classes[dev->devno]; 2943 2944 if (dev->class == ATA_DEV_PMP) 2945 rc = sata_pmp_attach(dev); 2946 else 2947 rc = ata_dev_read_id(dev, &dev->class, 2948 readid_flags, dev->id); 2949 2950 /* read_id might have changed class, store and reset */ 2951 ehc->classes[dev->devno] = dev->class; 2952 dev->class = ATA_DEV_UNKNOWN; 2953 2954 switch (rc) { 2955 case 0: 2956 /* clear error info accumulated during probe */ 2957 ata_ering_clear(&dev->ering); 2958 new_mask |= 1 << dev->devno; 2959 break; 2960 case -ENOENT: 2961 /* IDENTIFY was issued to non-existent 2962 * device. No need to reset. Just 2963 * thaw and ignore the device. 2964 */ 2965 ata_eh_thaw_port(ap); 2966 break; 2967 default: 2968 goto err; 2969 } 2970 } 2971 } 2972 2973 /* PDIAG- should have been released, ask cable type if post-reset */ 2974 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2975 if (ap->ops->cable_detect) 2976 ap->cbl = ap->ops->cable_detect(ap); 2977 ata_force_cbl(ap); 2978 } 2979 2980 /* Configure new devices forward such that user doesn't see 2981 * device detection messages backwards. 2982 */ 2983 ata_for_each_dev(dev, link, ALL) { 2984 if (!(new_mask & (1 << dev->devno))) 2985 continue; 2986 2987 dev->class = ehc->classes[dev->devno]; 2988 2989 if (dev->class == ATA_DEV_PMP) 2990 continue; 2991 2992 ehc->i.flags |= ATA_EHI_PRINTINFO; 2993 rc = ata_dev_configure(dev); 2994 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2995 if (rc) { 2996 dev->class = ATA_DEV_UNKNOWN; 2997 goto err; 2998 } 2999 3000 spin_lock_irqsave(ap->lock, flags); 3001 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3002 spin_unlock_irqrestore(ap->lock, flags); 3003 3004 /* new device discovered, configure xfermode */ 3005 ehc->i.flags |= ATA_EHI_SETMODE; 3006 } 3007 3008 return 0; 3009 3010 err: 3011 *r_failed_dev = dev; 3012 DPRINTK("EXIT rc=%d\n", rc); 3013 return rc; 3014 } 3015 3016 /** 3017 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3018 * @link: link on which timings will be programmed 3019 * @r_failed_dev: out parameter for failed device 3020 * 3021 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3022 * ata_set_mode() fails, pointer to the failing device is 3023 * returned in @r_failed_dev. 3024 * 3025 * LOCKING: 3026 * PCI/etc. bus probe sem. 3027 * 3028 * RETURNS: 3029 * 0 on success, negative errno otherwise 3030 */ 3031 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3032 { 3033 struct ata_port *ap = link->ap; 3034 struct ata_device *dev; 3035 int rc; 3036 3037 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3038 ata_for_each_dev(dev, link, ENABLED) { 3039 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3040 struct ata_ering_entry *ent; 3041 3042 ent = ata_ering_top(&dev->ering); 3043 if (ent) 3044 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3045 } 3046 } 3047 3048 /* has private set_mode? */ 3049 if (ap->ops->set_mode) 3050 rc = ap->ops->set_mode(link, r_failed_dev); 3051 else 3052 rc = ata_do_set_mode(link, r_failed_dev); 3053 3054 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3055 ata_for_each_dev(dev, link, ENABLED) { 3056 struct ata_eh_context *ehc = &link->eh_context; 3057 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3058 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3059 3060 if (dev->xfer_mode != saved_xfer_mode || 3061 ata_ncq_enabled(dev) != saved_ncq) 3062 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3063 } 3064 3065 return rc; 3066 } 3067 3068 /** 3069 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3070 * @dev: ATAPI device to clear UA for 3071 * 3072 * Resets and other operations can make an ATAPI device raise 3073 * UNIT ATTENTION which causes the next operation to fail. This 3074 * function clears UA. 3075 * 3076 * LOCKING: 3077 * EH context (may sleep). 3078 * 3079 * RETURNS: 3080 * 0 on success, -errno on failure. 3081 */ 3082 static int atapi_eh_clear_ua(struct ata_device *dev) 3083 { 3084 int i; 3085 3086 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3087 u8 *sense_buffer = dev->link->ap->sector_buf; 3088 u8 sense_key = 0; 3089 unsigned int err_mask; 3090 3091 err_mask = atapi_eh_tur(dev, &sense_key); 3092 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3093 ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY " 3094 "failed (err_mask=0x%x)\n", err_mask); 3095 return -EIO; 3096 } 3097 3098 if (!err_mask || sense_key != UNIT_ATTENTION) 3099 return 0; 3100 3101 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3102 if (err_mask) { 3103 ata_dev_printk(dev, KERN_WARNING, "failed to clear " 3104 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3105 return -EIO; 3106 } 3107 } 3108 3109 ata_dev_printk(dev, KERN_WARNING, 3110 "UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES); 3111 3112 return 0; 3113 } 3114 3115 static int ata_link_nr_enabled(struct ata_link *link) 3116 { 3117 struct ata_device *dev; 3118 int cnt = 0; 3119 3120 ata_for_each_dev(dev, link, ENABLED) 3121 cnt++; 3122 return cnt; 3123 } 3124 3125 static int ata_link_nr_vacant(struct ata_link *link) 3126 { 3127 struct ata_device *dev; 3128 int cnt = 0; 3129 3130 ata_for_each_dev(dev, link, ALL) 3131 if (dev->class == ATA_DEV_UNKNOWN) 3132 cnt++; 3133 return cnt; 3134 } 3135 3136 static int ata_eh_skip_recovery(struct ata_link *link) 3137 { 3138 struct ata_port *ap = link->ap; 3139 struct ata_eh_context *ehc = &link->eh_context; 3140 struct ata_device *dev; 3141 3142 /* skip disabled links */ 3143 if (link->flags & ATA_LFLAG_DISABLED) 3144 return 1; 3145 3146 /* thaw frozen port and recover failed devices */ 3147 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3148 return 0; 3149 3150 /* reset at least once if reset is requested */ 3151 if ((ehc->i.action & ATA_EH_RESET) && 3152 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3153 return 0; 3154 3155 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3156 ata_for_each_dev(dev, link, ALL) { 3157 if (dev->class == ATA_DEV_UNKNOWN && 3158 ehc->classes[dev->devno] != ATA_DEV_NONE) 3159 return 0; 3160 } 3161 3162 return 1; 3163 } 3164 3165 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3166 { 3167 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3168 u64 now = get_jiffies_64(); 3169 int *trials = void_arg; 3170 3171 if (ent->timestamp < now - min(now, interval)) 3172 return -1; 3173 3174 (*trials)++; 3175 return 0; 3176 } 3177 3178 static int ata_eh_schedule_probe(struct ata_device *dev) 3179 { 3180 struct ata_eh_context *ehc = &dev->link->eh_context; 3181 struct ata_link *link = ata_dev_phys_link(dev); 3182 int trials = 0; 3183 3184 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3185 (ehc->did_probe_mask & (1 << dev->devno))) 3186 return 0; 3187 3188 ata_eh_detach_dev(dev); 3189 ata_dev_init(dev); 3190 ehc->did_probe_mask |= (1 << dev->devno); 3191 ehc->i.action |= ATA_EH_RESET; 3192 ehc->saved_xfer_mode[dev->devno] = 0; 3193 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3194 3195 /* Record and count probe trials on the ering. The specific 3196 * error mask used is irrelevant. Because a successful device 3197 * detection clears the ering, this count accumulates only if 3198 * there are consecutive failed probes. 3199 * 3200 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3201 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3202 * forced to 1.5Gbps. 3203 * 3204 * This is to work around cases where failed link speed 3205 * negotiation results in device misdetection leading to 3206 * infinite DEVXCHG or PHRDY CHG events. 3207 */ 3208 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3209 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3210 3211 if (trials > ATA_EH_PROBE_TRIALS) 3212 sata_down_spd_limit(link, 1); 3213 3214 return 1; 3215 } 3216 3217 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3218 { 3219 struct ata_eh_context *ehc = &dev->link->eh_context; 3220 3221 /* -EAGAIN from EH routine indicates retry without prejudice. 3222 * The requester is responsible for ensuring forward progress. 3223 */ 3224 if (err != -EAGAIN) 3225 ehc->tries[dev->devno]--; 3226 3227 switch (err) { 3228 case -ENODEV: 3229 /* device missing or wrong IDENTIFY data, schedule probing */ 3230 ehc->i.probe_mask |= (1 << dev->devno); 3231 case -EINVAL: 3232 /* give it just one more chance */ 3233 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3234 case -EIO: 3235 if (ehc->tries[dev->devno] == 1) { 3236 /* This is the last chance, better to slow 3237 * down than lose it. 3238 */ 3239 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3240 if (dev->pio_mode > XFER_PIO_0) 3241 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3242 } 3243 } 3244 3245 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3246 /* disable device if it has used up all its chances */ 3247 ata_dev_disable(dev); 3248 3249 /* detach if offline */ 3250 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3251 ata_eh_detach_dev(dev); 3252 3253 /* schedule probe if necessary */ 3254 if (ata_eh_schedule_probe(dev)) { 3255 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3256 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3257 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3258 } 3259 3260 return 1; 3261 } else { 3262 ehc->i.action |= ATA_EH_RESET; 3263 return 0; 3264 } 3265 } 3266 3267 /** 3268 * ata_eh_recover - recover host port after error 3269 * @ap: host port to recover 3270 * @prereset: prereset method (can be NULL) 3271 * @softreset: softreset method (can be NULL) 3272 * @hardreset: hardreset method (can be NULL) 3273 * @postreset: postreset method (can be NULL) 3274 * @r_failed_link: out parameter for failed link 3275 * 3276 * This is the alpha and omega, eum and yang, heart and soul of 3277 * libata exception handling. On entry, actions required to 3278 * recover each link and hotplug requests are recorded in the 3279 * link's eh_context. This function executes all the operations 3280 * with appropriate retrials and fallbacks to resurrect failed 3281 * devices, detach goners and greet newcomers. 3282 * 3283 * LOCKING: 3284 * Kernel thread context (may sleep). 3285 * 3286 * RETURNS: 3287 * 0 on success, -errno on failure. 3288 */ 3289 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3290 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3291 ata_postreset_fn_t postreset, 3292 struct ata_link **r_failed_link) 3293 { 3294 struct ata_link *link; 3295 struct ata_device *dev; 3296 int nr_failed_devs; 3297 int rc; 3298 unsigned long flags, deadline; 3299 3300 DPRINTK("ENTER\n"); 3301 3302 /* prep for recovery */ 3303 ata_for_each_link(link, ap, EDGE) { 3304 struct ata_eh_context *ehc = &link->eh_context; 3305 3306 /* re-enable link? */ 3307 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3308 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3309 spin_lock_irqsave(ap->lock, flags); 3310 link->flags &= ~ATA_LFLAG_DISABLED; 3311 spin_unlock_irqrestore(ap->lock, flags); 3312 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3313 } 3314 3315 ata_for_each_dev(dev, link, ALL) { 3316 if (link->flags & ATA_LFLAG_NO_RETRY) 3317 ehc->tries[dev->devno] = 1; 3318 else 3319 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3320 3321 /* collect port action mask recorded in dev actions */ 3322 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3323 ~ATA_EH_PERDEV_MASK; 3324 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3325 3326 /* process hotplug request */ 3327 if (dev->flags & ATA_DFLAG_DETACH) 3328 ata_eh_detach_dev(dev); 3329 3330 /* schedule probe if necessary */ 3331 if (!ata_dev_enabled(dev)) 3332 ata_eh_schedule_probe(dev); 3333 } 3334 } 3335 3336 retry: 3337 rc = 0; 3338 nr_failed_devs = 0; 3339 3340 /* if UNLOADING, finish immediately */ 3341 if (ap->pflags & ATA_PFLAG_UNLOADING) 3342 goto out; 3343 3344 /* prep for EH */ 3345 ata_for_each_link(link, ap, EDGE) { 3346 struct ata_eh_context *ehc = &link->eh_context; 3347 3348 /* skip EH if possible. */ 3349 if (ata_eh_skip_recovery(link)) 3350 ehc->i.action = 0; 3351 3352 ata_for_each_dev(dev, link, ALL) 3353 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3354 } 3355 3356 /* reset */ 3357 ata_for_each_link(link, ap, EDGE) { 3358 struct ata_eh_context *ehc = &link->eh_context; 3359 3360 if (!(ehc->i.action & ATA_EH_RESET)) 3361 continue; 3362 3363 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3364 prereset, softreset, hardreset, postreset); 3365 if (rc) { 3366 ata_link_printk(link, KERN_ERR, 3367 "reset failed, giving up\n"); 3368 goto out; 3369 } 3370 } 3371 3372 do { 3373 unsigned long now; 3374 3375 /* 3376 * clears ATA_EH_PARK in eh_info and resets 3377 * ap->park_req_pending 3378 */ 3379 ata_eh_pull_park_action(ap); 3380 3381 deadline = jiffies; 3382 ata_for_each_link(link, ap, EDGE) { 3383 ata_for_each_dev(dev, link, ALL) { 3384 struct ata_eh_context *ehc = &link->eh_context; 3385 unsigned long tmp; 3386 3387 if (dev->class != ATA_DEV_ATA) 3388 continue; 3389 if (!(ehc->i.dev_action[dev->devno] & 3390 ATA_EH_PARK)) 3391 continue; 3392 tmp = dev->unpark_deadline; 3393 if (time_before(deadline, tmp)) 3394 deadline = tmp; 3395 else if (time_before_eq(tmp, jiffies)) 3396 continue; 3397 if (ehc->unloaded_mask & (1 << dev->devno)) 3398 continue; 3399 3400 ata_eh_park_issue_cmd(dev, 1); 3401 } 3402 } 3403 3404 now = jiffies; 3405 if (time_before_eq(deadline, now)) 3406 break; 3407 3408 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3409 deadline - now); 3410 } while (deadline); 3411 ata_for_each_link(link, ap, EDGE) { 3412 ata_for_each_dev(dev, link, ALL) { 3413 if (!(link->eh_context.unloaded_mask & 3414 (1 << dev->devno))) 3415 continue; 3416 3417 ata_eh_park_issue_cmd(dev, 0); 3418 ata_eh_done(link, dev, ATA_EH_PARK); 3419 } 3420 } 3421 3422 /* the rest */ 3423 ata_for_each_link(link, ap, EDGE) { 3424 struct ata_eh_context *ehc = &link->eh_context; 3425 3426 /* revalidate existing devices and attach new ones */ 3427 rc = ata_eh_revalidate_and_attach(link, &dev); 3428 if (rc) 3429 goto dev_fail; 3430 3431 /* if PMP got attached, return, pmp EH will take care of it */ 3432 if (link->device->class == ATA_DEV_PMP) { 3433 ehc->i.action = 0; 3434 return 0; 3435 } 3436 3437 /* configure transfer mode if necessary */ 3438 if (ehc->i.flags & ATA_EHI_SETMODE) { 3439 rc = ata_set_mode(link, &dev); 3440 if (rc) 3441 goto dev_fail; 3442 ehc->i.flags &= ~ATA_EHI_SETMODE; 3443 } 3444 3445 /* If reset has been issued, clear UA to avoid 3446 * disrupting the current users of the device. 3447 */ 3448 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3449 ata_for_each_dev(dev, link, ALL) { 3450 if (dev->class != ATA_DEV_ATAPI) 3451 continue; 3452 rc = atapi_eh_clear_ua(dev); 3453 if (rc) 3454 goto dev_fail; 3455 } 3456 } 3457 3458 /* configure link power saving */ 3459 if (ehc->i.action & ATA_EH_LPM) 3460 ata_for_each_dev(dev, link, ALL) 3461 ata_dev_enable_pm(dev, ap->pm_policy); 3462 3463 /* this link is okay now */ 3464 ehc->i.flags = 0; 3465 continue; 3466 3467 dev_fail: 3468 nr_failed_devs++; 3469 ata_eh_handle_dev_fail(dev, rc); 3470 3471 if (ap->pflags & ATA_PFLAG_FROZEN) { 3472 /* PMP reset requires working host port. 3473 * Can't retry if it's frozen. 3474 */ 3475 if (sata_pmp_attached(ap)) 3476 goto out; 3477 break; 3478 } 3479 } 3480 3481 if (nr_failed_devs) 3482 goto retry; 3483 3484 out: 3485 if (rc && r_failed_link) 3486 *r_failed_link = link; 3487 3488 DPRINTK("EXIT, rc=%d\n", rc); 3489 return rc; 3490 } 3491 3492 /** 3493 * ata_eh_finish - finish up EH 3494 * @ap: host port to finish EH for 3495 * 3496 * Recovery is complete. Clean up EH states and retry or finish 3497 * failed qcs. 3498 * 3499 * LOCKING: 3500 * None. 3501 */ 3502 void ata_eh_finish(struct ata_port *ap) 3503 { 3504 int tag; 3505 3506 /* retry or finish qcs */ 3507 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 3508 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 3509 3510 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3511 continue; 3512 3513 if (qc->err_mask) { 3514 /* FIXME: Once EH migration is complete, 3515 * generate sense data in this function, 3516 * considering both err_mask and tf. 3517 */ 3518 if (qc->flags & ATA_QCFLAG_RETRY) 3519 ata_eh_qc_retry(qc); 3520 else 3521 ata_eh_qc_complete(qc); 3522 } else { 3523 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3524 ata_eh_qc_complete(qc); 3525 } else { 3526 /* feed zero TF to sense generation */ 3527 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3528 ata_eh_qc_retry(qc); 3529 } 3530 } 3531 } 3532 3533 /* make sure nr_active_links is zero after EH */ 3534 WARN_ON(ap->nr_active_links); 3535 ap->nr_active_links = 0; 3536 } 3537 3538 /** 3539 * ata_do_eh - do standard error handling 3540 * @ap: host port to handle error for 3541 * 3542 * @prereset: prereset method (can be NULL) 3543 * @softreset: softreset method (can be NULL) 3544 * @hardreset: hardreset method (can be NULL) 3545 * @postreset: postreset method (can be NULL) 3546 * 3547 * Perform standard error handling sequence. 3548 * 3549 * LOCKING: 3550 * Kernel thread context (may sleep). 3551 */ 3552 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 3553 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3554 ata_postreset_fn_t postreset) 3555 { 3556 struct ata_device *dev; 3557 int rc; 3558 3559 ata_eh_autopsy(ap); 3560 ata_eh_report(ap); 3561 3562 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 3563 NULL); 3564 if (rc) { 3565 ata_for_each_dev(dev, &ap->link, ALL) 3566 ata_dev_disable(dev); 3567 } 3568 3569 ata_eh_finish(ap); 3570 } 3571 3572 /** 3573 * ata_std_error_handler - standard error handler 3574 * @ap: host port to handle error for 3575 * 3576 * Standard error handler 3577 * 3578 * LOCKING: 3579 * Kernel thread context (may sleep). 3580 */ 3581 void ata_std_error_handler(struct ata_port *ap) 3582 { 3583 struct ata_port_operations *ops = ap->ops; 3584 ata_reset_fn_t hardreset = ops->hardreset; 3585 3586 /* ignore built-in hardreset if SCR access is not available */ 3587 if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) 3588 hardreset = NULL; 3589 3590 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 3591 } 3592 3593 #ifdef CONFIG_PM 3594 /** 3595 * ata_eh_handle_port_suspend - perform port suspend operation 3596 * @ap: port to suspend 3597 * 3598 * Suspend @ap. 3599 * 3600 * LOCKING: 3601 * Kernel thread context (may sleep). 3602 */ 3603 static void ata_eh_handle_port_suspend(struct ata_port *ap) 3604 { 3605 unsigned long flags; 3606 int rc = 0; 3607 3608 /* are we suspending? */ 3609 spin_lock_irqsave(ap->lock, flags); 3610 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3611 ap->pm_mesg.event == PM_EVENT_ON) { 3612 spin_unlock_irqrestore(ap->lock, flags); 3613 return; 3614 } 3615 spin_unlock_irqrestore(ap->lock, flags); 3616 3617 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 3618 3619 /* tell ACPI we're suspending */ 3620 rc = ata_acpi_on_suspend(ap); 3621 if (rc) 3622 goto out; 3623 3624 /* suspend */ 3625 ata_eh_freeze_port(ap); 3626 3627 if (ap->ops->port_suspend) 3628 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 3629 3630 ata_acpi_set_state(ap, PMSG_SUSPEND); 3631 out: 3632 /* report result */ 3633 spin_lock_irqsave(ap->lock, flags); 3634 3635 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 3636 if (rc == 0) 3637 ap->pflags |= ATA_PFLAG_SUSPENDED; 3638 else if (ap->pflags & ATA_PFLAG_FROZEN) 3639 ata_port_schedule_eh(ap); 3640 3641 if (ap->pm_result) { 3642 *ap->pm_result = rc; 3643 ap->pm_result = NULL; 3644 } 3645 3646 spin_unlock_irqrestore(ap->lock, flags); 3647 3648 return; 3649 } 3650 3651 /** 3652 * ata_eh_handle_port_resume - perform port resume operation 3653 * @ap: port to resume 3654 * 3655 * Resume @ap. 3656 * 3657 * LOCKING: 3658 * Kernel thread context (may sleep). 3659 */ 3660 static void ata_eh_handle_port_resume(struct ata_port *ap) 3661 { 3662 struct ata_link *link; 3663 struct ata_device *dev; 3664 unsigned long flags; 3665 int rc = 0; 3666 3667 /* are we resuming? */ 3668 spin_lock_irqsave(ap->lock, flags); 3669 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3670 ap->pm_mesg.event != PM_EVENT_ON) { 3671 spin_unlock_irqrestore(ap->lock, flags); 3672 return; 3673 } 3674 spin_unlock_irqrestore(ap->lock, flags); 3675 3676 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 3677 3678 /* 3679 * Error timestamps are in jiffies which doesn't run while 3680 * suspended and PHY events during resume isn't too uncommon. 3681 * When the two are combined, it can lead to unnecessary speed 3682 * downs if the machine is suspended and resumed repeatedly. 3683 * Clear error history. 3684 */ 3685 ata_for_each_link(link, ap, HOST_FIRST) 3686 ata_for_each_dev(dev, link, ALL) 3687 ata_ering_clear(&dev->ering); 3688 3689 ata_acpi_set_state(ap, PMSG_ON); 3690 3691 if (ap->ops->port_resume) 3692 rc = ap->ops->port_resume(ap); 3693 3694 /* tell ACPI that we're resuming */ 3695 ata_acpi_on_resume(ap); 3696 3697 /* report result */ 3698 spin_lock_irqsave(ap->lock, flags); 3699 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 3700 if (ap->pm_result) { 3701 *ap->pm_result = rc; 3702 ap->pm_result = NULL; 3703 } 3704 spin_unlock_irqrestore(ap->lock, flags); 3705 } 3706 #endif /* CONFIG_PM */ 3707