1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/pci.h> 38 #include <scsi/scsi.h> 39 #include <scsi/scsi_host.h> 40 #include <scsi/scsi_eh.h> 41 #include <scsi/scsi_device.h> 42 #include <scsi/scsi_cmnd.h> 43 #include "../scsi/scsi_transport_api.h" 44 45 #include <linux/libata.h> 46 47 #include "libata.h" 48 49 enum { 50 /* speed down verdicts */ 51 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 52 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 53 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 54 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 55 56 /* error flags */ 57 ATA_EFLAG_IS_IO = (1 << 0), 58 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 59 60 /* error categories */ 61 ATA_ECAT_NONE = 0, 62 ATA_ECAT_ATA_BUS = 1, 63 ATA_ECAT_TOUT_HSM = 2, 64 ATA_ECAT_UNK_DEV = 3, 65 ATA_ECAT_DUBIOUS_NONE = 4, 66 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 67 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 68 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 69 ATA_ECAT_NR = 8, 70 71 ATA_EH_CMD_DFL_TIMEOUT = 5000, 72 73 /* always put at least this amount of time between resets */ 74 ATA_EH_RESET_COOL_DOWN = 5000, 75 76 /* Waiting in ->prereset can never be reliable. It's 77 * sometimes nice to wait there but it can't be depended upon; 78 * otherwise, we wouldn't be resetting. Just give it enough 79 * time for most drives to spin up. 80 */ 81 ATA_EH_PRERESET_TIMEOUT = 10000, 82 ATA_EH_FASTDRAIN_INTERVAL = 3000, 83 84 ATA_EH_UA_TRIES = 5, 85 86 /* probe speed down parameters, see ata_eh_schedule_probe() */ 87 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 88 ATA_EH_PROBE_TRIALS = 2, 89 }; 90 91 /* The following table determines how we sequence resets. Each entry 92 * represents timeout for that try. The first try can be soft or 93 * hardreset. All others are hardreset if available. In most cases 94 * the first reset w/ 10sec timeout should succeed. Following entries 95 * are mostly for error handling, hotplug and retarded devices. 96 */ 97 static const unsigned long ata_eh_reset_timeouts[] = { 98 10000, /* most drives spin up by 10sec */ 99 10000, /* > 99% working drives spin up before 20sec */ 100 35000, /* give > 30 secs of idleness for retarded devices */ 101 5000, /* and sweet one last chance */ 102 ULONG_MAX, /* > 1 min has elapsed, give up */ 103 }; 104 105 static const unsigned long ata_eh_identify_timeouts[] = { 106 5000, /* covers > 99% of successes and not too boring on failures */ 107 10000, /* combined time till here is enough even for media access */ 108 30000, /* for true idiots */ 109 ULONG_MAX, 110 }; 111 112 static const unsigned long ata_eh_other_timeouts[] = { 113 5000, /* same rationale as identify timeout */ 114 10000, /* ditto */ 115 /* but no merciful 30sec for other commands, it just isn't worth it */ 116 ULONG_MAX, 117 }; 118 119 struct ata_eh_cmd_timeout_ent { 120 const u8 *commands; 121 const unsigned long *timeouts; 122 }; 123 124 /* The following table determines timeouts to use for EH internal 125 * commands. Each table entry is a command class and matches the 126 * commands the entry applies to and the timeout table to use. 127 * 128 * On the retry after a command timed out, the next timeout value from 129 * the table is used. If the table doesn't contain further entries, 130 * the last value is used. 131 * 132 * ehc->cmd_timeout_idx keeps track of which timeout to use per 133 * command class, so if SET_FEATURES times out on the first try, the 134 * next try will use the second timeout value only for that class. 135 */ 136 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 137 static const struct ata_eh_cmd_timeout_ent 138 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 139 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 140 .timeouts = ata_eh_identify_timeouts, }, 141 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 142 .timeouts = ata_eh_other_timeouts, }, 143 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 144 .timeouts = ata_eh_other_timeouts, }, 145 { .commands = CMDS(ATA_CMD_SET_FEATURES), 146 .timeouts = ata_eh_other_timeouts, }, 147 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 148 .timeouts = ata_eh_other_timeouts, }, 149 }; 150 #undef CMDS 151 152 static void __ata_port_freeze(struct ata_port *ap); 153 #ifdef CONFIG_PM 154 static void ata_eh_handle_port_suspend(struct ata_port *ap); 155 static void ata_eh_handle_port_resume(struct ata_port *ap); 156 #else /* CONFIG_PM */ 157 static void ata_eh_handle_port_suspend(struct ata_port *ap) 158 { } 159 160 static void ata_eh_handle_port_resume(struct ata_port *ap) 161 { } 162 #endif /* CONFIG_PM */ 163 164 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 165 va_list args) 166 { 167 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 168 ATA_EH_DESC_LEN - ehi->desc_len, 169 fmt, args); 170 } 171 172 /** 173 * __ata_ehi_push_desc - push error description without adding separator 174 * @ehi: target EHI 175 * @fmt: printf format string 176 * 177 * Format string according to @fmt and append it to @ehi->desc. 178 * 179 * LOCKING: 180 * spin_lock_irqsave(host lock) 181 */ 182 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 183 { 184 va_list args; 185 186 va_start(args, fmt); 187 __ata_ehi_pushv_desc(ehi, fmt, args); 188 va_end(args); 189 } 190 191 /** 192 * ata_ehi_push_desc - push error description with separator 193 * @ehi: target EHI 194 * @fmt: printf format string 195 * 196 * Format string according to @fmt and append it to @ehi->desc. 197 * If @ehi->desc is not empty, ", " is added in-between. 198 * 199 * LOCKING: 200 * spin_lock_irqsave(host lock) 201 */ 202 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 203 { 204 va_list args; 205 206 if (ehi->desc_len) 207 __ata_ehi_push_desc(ehi, ", "); 208 209 va_start(args, fmt); 210 __ata_ehi_pushv_desc(ehi, fmt, args); 211 va_end(args); 212 } 213 214 /** 215 * ata_ehi_clear_desc - clean error description 216 * @ehi: target EHI 217 * 218 * Clear @ehi->desc. 219 * 220 * LOCKING: 221 * spin_lock_irqsave(host lock) 222 */ 223 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 224 { 225 ehi->desc[0] = '\0'; 226 ehi->desc_len = 0; 227 } 228 229 /** 230 * ata_port_desc - append port description 231 * @ap: target ATA port 232 * @fmt: printf format string 233 * 234 * Format string according to @fmt and append it to port 235 * description. If port description is not empty, " " is added 236 * in-between. This function is to be used while initializing 237 * ata_host. The description is printed on host registration. 238 * 239 * LOCKING: 240 * None. 241 */ 242 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 243 { 244 va_list args; 245 246 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 247 248 if (ap->link.eh_info.desc_len) 249 __ata_ehi_push_desc(&ap->link.eh_info, " "); 250 251 va_start(args, fmt); 252 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 253 va_end(args); 254 } 255 256 #ifdef CONFIG_PCI 257 258 /** 259 * ata_port_pbar_desc - append PCI BAR description 260 * @ap: target ATA port 261 * @bar: target PCI BAR 262 * @offset: offset into PCI BAR 263 * @name: name of the area 264 * 265 * If @offset is negative, this function formats a string which 266 * contains the name, address, size and type of the BAR and 267 * appends it to the port description. If @offset is zero or 268 * positive, only name and offsetted address is appended. 269 * 270 * LOCKING: 271 * None. 272 */ 273 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 274 const char *name) 275 { 276 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 277 char *type = ""; 278 unsigned long long start, len; 279 280 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 281 type = "m"; 282 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 283 type = "i"; 284 285 start = (unsigned long long)pci_resource_start(pdev, bar); 286 len = (unsigned long long)pci_resource_len(pdev, bar); 287 288 if (offset < 0) 289 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 290 else 291 ata_port_desc(ap, "%s 0x%llx", name, 292 start + (unsigned long long)offset); 293 } 294 295 #endif /* CONFIG_PCI */ 296 297 static int ata_lookup_timeout_table(u8 cmd) 298 { 299 int i; 300 301 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 302 const u8 *cur; 303 304 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 305 if (*cur == cmd) 306 return i; 307 } 308 309 return -1; 310 } 311 312 /** 313 * ata_internal_cmd_timeout - determine timeout for an internal command 314 * @dev: target device 315 * @cmd: internal command to be issued 316 * 317 * Determine timeout for internal command @cmd for @dev. 318 * 319 * LOCKING: 320 * EH context. 321 * 322 * RETURNS: 323 * Determined timeout. 324 */ 325 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 326 { 327 struct ata_eh_context *ehc = &dev->link->eh_context; 328 int ent = ata_lookup_timeout_table(cmd); 329 int idx; 330 331 if (ent < 0) 332 return ATA_EH_CMD_DFL_TIMEOUT; 333 334 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 335 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 336 } 337 338 /** 339 * ata_internal_cmd_timed_out - notification for internal command timeout 340 * @dev: target device 341 * @cmd: internal command which timed out 342 * 343 * Notify EH that internal command @cmd for @dev timed out. This 344 * function should be called only for commands whose timeouts are 345 * determined using ata_internal_cmd_timeout(). 346 * 347 * LOCKING: 348 * EH context. 349 */ 350 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 351 { 352 struct ata_eh_context *ehc = &dev->link->eh_context; 353 int ent = ata_lookup_timeout_table(cmd); 354 int idx; 355 356 if (ent < 0) 357 return; 358 359 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 360 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 361 ehc->cmd_timeout_idx[dev->devno][ent]++; 362 } 363 364 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 365 unsigned int err_mask) 366 { 367 struct ata_ering_entry *ent; 368 369 WARN_ON(!err_mask); 370 371 ering->cursor++; 372 ering->cursor %= ATA_ERING_SIZE; 373 374 ent = &ering->ring[ering->cursor]; 375 ent->eflags = eflags; 376 ent->err_mask = err_mask; 377 ent->timestamp = get_jiffies_64(); 378 } 379 380 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 381 { 382 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 383 384 if (ent->err_mask) 385 return ent; 386 return NULL; 387 } 388 389 static void ata_ering_clear(struct ata_ering *ering) 390 { 391 memset(ering, 0, sizeof(*ering)); 392 } 393 394 static int ata_ering_map(struct ata_ering *ering, 395 int (*map_fn)(struct ata_ering_entry *, void *), 396 void *arg) 397 { 398 int idx, rc = 0; 399 struct ata_ering_entry *ent; 400 401 idx = ering->cursor; 402 do { 403 ent = &ering->ring[idx]; 404 if (!ent->err_mask) 405 break; 406 rc = map_fn(ent, arg); 407 if (rc) 408 break; 409 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 410 } while (idx != ering->cursor); 411 412 return rc; 413 } 414 415 static unsigned int ata_eh_dev_action(struct ata_device *dev) 416 { 417 struct ata_eh_context *ehc = &dev->link->eh_context; 418 419 return ehc->i.action | ehc->i.dev_action[dev->devno]; 420 } 421 422 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 423 struct ata_eh_info *ehi, unsigned int action) 424 { 425 struct ata_device *tdev; 426 427 if (!dev) { 428 ehi->action &= ~action; 429 ata_for_each_dev(tdev, link, ALL) 430 ehi->dev_action[tdev->devno] &= ~action; 431 } else { 432 /* doesn't make sense for port-wide EH actions */ 433 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 434 435 /* break ehi->action into ehi->dev_action */ 436 if (ehi->action & action) { 437 ata_for_each_dev(tdev, link, ALL) 438 ehi->dev_action[tdev->devno] |= 439 ehi->action & action; 440 ehi->action &= ~action; 441 } 442 443 /* turn off the specified per-dev action */ 444 ehi->dev_action[dev->devno] &= ~action; 445 } 446 } 447 448 /** 449 * ata_scsi_timed_out - SCSI layer time out callback 450 * @cmd: timed out SCSI command 451 * 452 * Handles SCSI layer timeout. We race with normal completion of 453 * the qc for @cmd. If the qc is already gone, we lose and let 454 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 455 * timed out and EH should be invoked. Prevent ata_qc_complete() 456 * from finishing it by setting EH_SCHEDULED and return 457 * EH_NOT_HANDLED. 458 * 459 * TODO: kill this function once old EH is gone. 460 * 461 * LOCKING: 462 * Called from timer context 463 * 464 * RETURNS: 465 * EH_HANDLED or EH_NOT_HANDLED 466 */ 467 enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 468 { 469 struct Scsi_Host *host = cmd->device->host; 470 struct ata_port *ap = ata_shost_to_port(host); 471 unsigned long flags; 472 struct ata_queued_cmd *qc; 473 enum blk_eh_timer_return ret; 474 475 DPRINTK("ENTER\n"); 476 477 if (ap->ops->error_handler) { 478 ret = BLK_EH_NOT_HANDLED; 479 goto out; 480 } 481 482 ret = BLK_EH_HANDLED; 483 spin_lock_irqsave(ap->lock, flags); 484 qc = ata_qc_from_tag(ap, ap->link.active_tag); 485 if (qc) { 486 WARN_ON(qc->scsicmd != cmd); 487 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 488 qc->err_mask |= AC_ERR_TIMEOUT; 489 ret = BLK_EH_NOT_HANDLED; 490 } 491 spin_unlock_irqrestore(ap->lock, flags); 492 493 out: 494 DPRINTK("EXIT, ret=%d\n", ret); 495 return ret; 496 } 497 498 static void ata_eh_unload(struct ata_port *ap) 499 { 500 struct ata_link *link; 501 struct ata_device *dev; 502 unsigned long flags; 503 504 /* Restore SControl IPM and SPD for the next driver and 505 * disable attached devices. 506 */ 507 ata_for_each_link(link, ap, PMP_FIRST) { 508 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 509 ata_for_each_dev(dev, link, ALL) 510 ata_dev_disable(dev); 511 } 512 513 /* freeze and set UNLOADED */ 514 spin_lock_irqsave(ap->lock, flags); 515 516 ata_port_freeze(ap); /* won't be thawed */ 517 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 518 ap->pflags |= ATA_PFLAG_UNLOADED; 519 520 spin_unlock_irqrestore(ap->lock, flags); 521 } 522 523 /** 524 * ata_scsi_error - SCSI layer error handler callback 525 * @host: SCSI host on which error occurred 526 * 527 * Handles SCSI-layer-thrown error events. 528 * 529 * LOCKING: 530 * Inherited from SCSI layer (none, can sleep) 531 * 532 * RETURNS: 533 * Zero. 534 */ 535 void ata_scsi_error(struct Scsi_Host *host) 536 { 537 struct ata_port *ap = ata_shost_to_port(host); 538 int i; 539 unsigned long flags; 540 541 DPRINTK("ENTER\n"); 542 543 /* synchronize with port task */ 544 ata_port_flush_task(ap); 545 546 /* synchronize with host lock and sort out timeouts */ 547 548 /* For new EH, all qcs are finished in one of three ways - 549 * normal completion, error completion, and SCSI timeout. 550 * Both completions can race against SCSI timeout. When normal 551 * completion wins, the qc never reaches EH. When error 552 * completion wins, the qc has ATA_QCFLAG_FAILED set. 553 * 554 * When SCSI timeout wins, things are a bit more complex. 555 * Normal or error completion can occur after the timeout but 556 * before this point. In such cases, both types of 557 * completions are honored. A scmd is determined to have 558 * timed out iff its associated qc is active and not failed. 559 */ 560 if (ap->ops->error_handler) { 561 struct scsi_cmnd *scmd, *tmp; 562 int nr_timedout = 0; 563 564 spin_lock_irqsave(ap->lock, flags); 565 566 /* This must occur under the ap->lock as we don't want 567 a polled recovery to race the real interrupt handler 568 569 The lost_interrupt handler checks for any completed but 570 non-notified command and completes much like an IRQ handler. 571 572 We then fall into the error recovery code which will treat 573 this as if normal completion won the race */ 574 575 if (ap->ops->lost_interrupt) 576 ap->ops->lost_interrupt(ap); 577 578 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 579 struct ata_queued_cmd *qc; 580 581 for (i = 0; i < ATA_MAX_QUEUE; i++) { 582 qc = __ata_qc_from_tag(ap, i); 583 if (qc->flags & ATA_QCFLAG_ACTIVE && 584 qc->scsicmd == scmd) 585 break; 586 } 587 588 if (i < ATA_MAX_QUEUE) { 589 /* the scmd has an associated qc */ 590 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 591 /* which hasn't failed yet, timeout */ 592 qc->err_mask |= AC_ERR_TIMEOUT; 593 qc->flags |= ATA_QCFLAG_FAILED; 594 nr_timedout++; 595 } 596 } else { 597 /* Normal completion occurred after 598 * SCSI timeout but before this point. 599 * Successfully complete it. 600 */ 601 scmd->retries = scmd->allowed; 602 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 603 } 604 } 605 606 /* If we have timed out qcs. They belong to EH from 607 * this point but the state of the controller is 608 * unknown. Freeze the port to make sure the IRQ 609 * handler doesn't diddle with those qcs. This must 610 * be done atomically w.r.t. setting QCFLAG_FAILED. 611 */ 612 if (nr_timedout) 613 __ata_port_freeze(ap); 614 615 spin_unlock_irqrestore(ap->lock, flags); 616 617 /* initialize eh_tries */ 618 ap->eh_tries = ATA_EH_MAX_TRIES; 619 } else 620 spin_unlock_wait(ap->lock); 621 622 /* If we timed raced normal completion and there is nothing to 623 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 624 625 repeat: 626 /* invoke error handler */ 627 if (ap->ops->error_handler) { 628 struct ata_link *link; 629 630 /* kill fast drain timer */ 631 del_timer_sync(&ap->fastdrain_timer); 632 633 /* process port resume request */ 634 ata_eh_handle_port_resume(ap); 635 636 /* fetch & clear EH info */ 637 spin_lock_irqsave(ap->lock, flags); 638 639 ata_for_each_link(link, ap, HOST_FIRST) { 640 struct ata_eh_context *ehc = &link->eh_context; 641 struct ata_device *dev; 642 643 memset(&link->eh_context, 0, sizeof(link->eh_context)); 644 link->eh_context.i = link->eh_info; 645 memset(&link->eh_info, 0, sizeof(link->eh_info)); 646 647 ata_for_each_dev(dev, link, ENABLED) { 648 int devno = dev->devno; 649 650 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 651 if (ata_ncq_enabled(dev)) 652 ehc->saved_ncq_enabled |= 1 << devno; 653 } 654 } 655 656 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 657 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 658 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 659 660 spin_unlock_irqrestore(ap->lock, flags); 661 662 /* invoke EH, skip if unloading or suspended */ 663 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 664 ap->ops->error_handler(ap); 665 else { 666 /* if unloading, commence suicide */ 667 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 668 !(ap->pflags & ATA_PFLAG_UNLOADED)) 669 ata_eh_unload(ap); 670 ata_eh_finish(ap); 671 } 672 673 /* process port suspend request */ 674 ata_eh_handle_port_suspend(ap); 675 676 /* Exception might have happend after ->error_handler 677 * recovered the port but before this point. Repeat 678 * EH in such case. 679 */ 680 spin_lock_irqsave(ap->lock, flags); 681 682 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 683 if (--ap->eh_tries) { 684 spin_unlock_irqrestore(ap->lock, flags); 685 goto repeat; 686 } 687 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 688 "tries, giving up\n", ATA_EH_MAX_TRIES); 689 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 690 } 691 692 /* this run is complete, make sure EH info is clear */ 693 ata_for_each_link(link, ap, HOST_FIRST) 694 memset(&link->eh_info, 0, sizeof(link->eh_info)); 695 696 /* Clear host_eh_scheduled while holding ap->lock such 697 * that if exception occurs after this point but 698 * before EH completion, SCSI midlayer will 699 * re-initiate EH. 700 */ 701 host->host_eh_scheduled = 0; 702 703 spin_unlock_irqrestore(ap->lock, flags); 704 } else { 705 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 706 ap->ops->eng_timeout(ap); 707 } 708 709 /* finish or retry handled scmd's and clean up */ 710 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 711 712 scsi_eh_flush_done_q(&ap->eh_done_q); 713 714 /* clean up */ 715 spin_lock_irqsave(ap->lock, flags); 716 717 if (ap->pflags & ATA_PFLAG_LOADING) 718 ap->pflags &= ~ATA_PFLAG_LOADING; 719 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 720 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 721 722 if (ap->pflags & ATA_PFLAG_RECOVERED) 723 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 724 725 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 726 727 /* tell wait_eh that we're done */ 728 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 729 wake_up_all(&ap->eh_wait_q); 730 731 spin_unlock_irqrestore(ap->lock, flags); 732 733 DPRINTK("EXIT\n"); 734 } 735 736 /** 737 * ata_port_wait_eh - Wait for the currently pending EH to complete 738 * @ap: Port to wait EH for 739 * 740 * Wait until the currently pending EH is complete. 741 * 742 * LOCKING: 743 * Kernel thread context (may sleep). 744 */ 745 void ata_port_wait_eh(struct ata_port *ap) 746 { 747 unsigned long flags; 748 DEFINE_WAIT(wait); 749 750 retry: 751 spin_lock_irqsave(ap->lock, flags); 752 753 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 754 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 755 spin_unlock_irqrestore(ap->lock, flags); 756 schedule(); 757 spin_lock_irqsave(ap->lock, flags); 758 } 759 finish_wait(&ap->eh_wait_q, &wait); 760 761 spin_unlock_irqrestore(ap->lock, flags); 762 763 /* make sure SCSI EH is complete */ 764 if (scsi_host_in_recovery(ap->scsi_host)) { 765 msleep(10); 766 goto retry; 767 } 768 } 769 770 static int ata_eh_nr_in_flight(struct ata_port *ap) 771 { 772 unsigned int tag; 773 int nr = 0; 774 775 /* count only non-internal commands */ 776 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 777 if (ata_qc_from_tag(ap, tag)) 778 nr++; 779 780 return nr; 781 } 782 783 void ata_eh_fastdrain_timerfn(unsigned long arg) 784 { 785 struct ata_port *ap = (void *)arg; 786 unsigned long flags; 787 int cnt; 788 789 spin_lock_irqsave(ap->lock, flags); 790 791 cnt = ata_eh_nr_in_flight(ap); 792 793 /* are we done? */ 794 if (!cnt) 795 goto out_unlock; 796 797 if (cnt == ap->fastdrain_cnt) { 798 unsigned int tag; 799 800 /* No progress during the last interval, tag all 801 * in-flight qcs as timed out and freeze the port. 802 */ 803 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 804 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 805 if (qc) 806 qc->err_mask |= AC_ERR_TIMEOUT; 807 } 808 809 ata_port_freeze(ap); 810 } else { 811 /* some qcs have finished, give it another chance */ 812 ap->fastdrain_cnt = cnt; 813 ap->fastdrain_timer.expires = 814 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 815 add_timer(&ap->fastdrain_timer); 816 } 817 818 out_unlock: 819 spin_unlock_irqrestore(ap->lock, flags); 820 } 821 822 /** 823 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 824 * @ap: target ATA port 825 * @fastdrain: activate fast drain 826 * 827 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 828 * is non-zero and EH wasn't pending before. Fast drain ensures 829 * that EH kicks in in timely manner. 830 * 831 * LOCKING: 832 * spin_lock_irqsave(host lock) 833 */ 834 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 835 { 836 int cnt; 837 838 /* already scheduled? */ 839 if (ap->pflags & ATA_PFLAG_EH_PENDING) 840 return; 841 842 ap->pflags |= ATA_PFLAG_EH_PENDING; 843 844 if (!fastdrain) 845 return; 846 847 /* do we have in-flight qcs? */ 848 cnt = ata_eh_nr_in_flight(ap); 849 if (!cnt) 850 return; 851 852 /* activate fast drain */ 853 ap->fastdrain_cnt = cnt; 854 ap->fastdrain_timer.expires = 855 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 856 add_timer(&ap->fastdrain_timer); 857 } 858 859 /** 860 * ata_qc_schedule_eh - schedule qc for error handling 861 * @qc: command to schedule error handling for 862 * 863 * Schedule error handling for @qc. EH will kick in as soon as 864 * other commands are drained. 865 * 866 * LOCKING: 867 * spin_lock_irqsave(host lock) 868 */ 869 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 870 { 871 struct ata_port *ap = qc->ap; 872 873 WARN_ON(!ap->ops->error_handler); 874 875 qc->flags |= ATA_QCFLAG_FAILED; 876 ata_eh_set_pending(ap, 1); 877 878 /* The following will fail if timeout has already expired. 879 * ata_scsi_error() takes care of such scmds on EH entry. 880 * Note that ATA_QCFLAG_FAILED is unconditionally set after 881 * this function completes. 882 */ 883 blk_abort_request(qc->scsicmd->request); 884 } 885 886 /** 887 * ata_port_schedule_eh - schedule error handling without a qc 888 * @ap: ATA port to schedule EH for 889 * 890 * Schedule error handling for @ap. EH will kick in as soon as 891 * all commands are drained. 892 * 893 * LOCKING: 894 * spin_lock_irqsave(host lock) 895 */ 896 void ata_port_schedule_eh(struct ata_port *ap) 897 { 898 WARN_ON(!ap->ops->error_handler); 899 900 if (ap->pflags & ATA_PFLAG_INITIALIZING) 901 return; 902 903 ata_eh_set_pending(ap, 1); 904 scsi_schedule_eh(ap->scsi_host); 905 906 DPRINTK("port EH scheduled\n"); 907 } 908 909 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 910 { 911 int tag, nr_aborted = 0; 912 913 WARN_ON(!ap->ops->error_handler); 914 915 /* we're gonna abort all commands, no need for fast drain */ 916 ata_eh_set_pending(ap, 0); 917 918 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 919 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 920 921 if (qc && (!link || qc->dev->link == link)) { 922 qc->flags |= ATA_QCFLAG_FAILED; 923 ata_qc_complete(qc); 924 nr_aborted++; 925 } 926 } 927 928 if (!nr_aborted) 929 ata_port_schedule_eh(ap); 930 931 return nr_aborted; 932 } 933 934 /** 935 * ata_link_abort - abort all qc's on the link 936 * @link: ATA link to abort qc's for 937 * 938 * Abort all active qc's active on @link and schedule EH. 939 * 940 * LOCKING: 941 * spin_lock_irqsave(host lock) 942 * 943 * RETURNS: 944 * Number of aborted qc's. 945 */ 946 int ata_link_abort(struct ata_link *link) 947 { 948 return ata_do_link_abort(link->ap, link); 949 } 950 951 /** 952 * ata_port_abort - abort all qc's on the port 953 * @ap: ATA port to abort qc's for 954 * 955 * Abort all active qc's of @ap and schedule EH. 956 * 957 * LOCKING: 958 * spin_lock_irqsave(host_set lock) 959 * 960 * RETURNS: 961 * Number of aborted qc's. 962 */ 963 int ata_port_abort(struct ata_port *ap) 964 { 965 return ata_do_link_abort(ap, NULL); 966 } 967 968 /** 969 * __ata_port_freeze - freeze port 970 * @ap: ATA port to freeze 971 * 972 * This function is called when HSM violation or some other 973 * condition disrupts normal operation of the port. Frozen port 974 * is not allowed to perform any operation until the port is 975 * thawed, which usually follows a successful reset. 976 * 977 * ap->ops->freeze() callback can be used for freezing the port 978 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 979 * port cannot be frozen hardware-wise, the interrupt handler 980 * must ack and clear interrupts unconditionally while the port 981 * is frozen. 982 * 983 * LOCKING: 984 * spin_lock_irqsave(host lock) 985 */ 986 static void __ata_port_freeze(struct ata_port *ap) 987 { 988 WARN_ON(!ap->ops->error_handler); 989 990 if (ap->ops->freeze) 991 ap->ops->freeze(ap); 992 993 ap->pflags |= ATA_PFLAG_FROZEN; 994 995 DPRINTK("ata%u port frozen\n", ap->print_id); 996 } 997 998 /** 999 * ata_port_freeze - abort & freeze port 1000 * @ap: ATA port to freeze 1001 * 1002 * Abort and freeze @ap. 1003 * 1004 * LOCKING: 1005 * spin_lock_irqsave(host lock) 1006 * 1007 * RETURNS: 1008 * Number of aborted commands. 1009 */ 1010 int ata_port_freeze(struct ata_port *ap) 1011 { 1012 int nr_aborted; 1013 1014 WARN_ON(!ap->ops->error_handler); 1015 1016 nr_aborted = ata_port_abort(ap); 1017 __ata_port_freeze(ap); 1018 1019 return nr_aborted; 1020 } 1021 1022 /** 1023 * sata_async_notification - SATA async notification handler 1024 * @ap: ATA port where async notification is received 1025 * 1026 * Handler to be called when async notification via SDB FIS is 1027 * received. This function schedules EH if necessary. 1028 * 1029 * LOCKING: 1030 * spin_lock_irqsave(host lock) 1031 * 1032 * RETURNS: 1033 * 1 if EH is scheduled, 0 otherwise. 1034 */ 1035 int sata_async_notification(struct ata_port *ap) 1036 { 1037 u32 sntf; 1038 int rc; 1039 1040 if (!(ap->flags & ATA_FLAG_AN)) 1041 return 0; 1042 1043 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1044 if (rc == 0) 1045 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1046 1047 if (!sata_pmp_attached(ap) || rc) { 1048 /* PMP is not attached or SNTF is not available */ 1049 if (!sata_pmp_attached(ap)) { 1050 /* PMP is not attached. Check whether ATAPI 1051 * AN is configured. If so, notify media 1052 * change. 1053 */ 1054 struct ata_device *dev = ap->link.device; 1055 1056 if ((dev->class == ATA_DEV_ATAPI) && 1057 (dev->flags & ATA_DFLAG_AN)) 1058 ata_scsi_media_change_notify(dev); 1059 return 0; 1060 } else { 1061 /* PMP is attached but SNTF is not available. 1062 * ATAPI async media change notification is 1063 * not used. The PMP must be reporting PHY 1064 * status change, schedule EH. 1065 */ 1066 ata_port_schedule_eh(ap); 1067 return 1; 1068 } 1069 } else { 1070 /* PMP is attached and SNTF is available */ 1071 struct ata_link *link; 1072 1073 /* check and notify ATAPI AN */ 1074 ata_for_each_link(link, ap, EDGE) { 1075 if (!(sntf & (1 << link->pmp))) 1076 continue; 1077 1078 if ((link->device->class == ATA_DEV_ATAPI) && 1079 (link->device->flags & ATA_DFLAG_AN)) 1080 ata_scsi_media_change_notify(link->device); 1081 } 1082 1083 /* If PMP is reporting that PHY status of some 1084 * downstream ports has changed, schedule EH. 1085 */ 1086 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1087 ata_port_schedule_eh(ap); 1088 return 1; 1089 } 1090 1091 return 0; 1092 } 1093 } 1094 1095 /** 1096 * ata_eh_freeze_port - EH helper to freeze port 1097 * @ap: ATA port to freeze 1098 * 1099 * Freeze @ap. 1100 * 1101 * LOCKING: 1102 * None. 1103 */ 1104 void ata_eh_freeze_port(struct ata_port *ap) 1105 { 1106 unsigned long flags; 1107 1108 if (!ap->ops->error_handler) 1109 return; 1110 1111 spin_lock_irqsave(ap->lock, flags); 1112 __ata_port_freeze(ap); 1113 spin_unlock_irqrestore(ap->lock, flags); 1114 } 1115 1116 /** 1117 * ata_port_thaw_port - EH helper to thaw port 1118 * @ap: ATA port to thaw 1119 * 1120 * Thaw frozen port @ap. 1121 * 1122 * LOCKING: 1123 * None. 1124 */ 1125 void ata_eh_thaw_port(struct ata_port *ap) 1126 { 1127 unsigned long flags; 1128 1129 if (!ap->ops->error_handler) 1130 return; 1131 1132 spin_lock_irqsave(ap->lock, flags); 1133 1134 ap->pflags &= ~ATA_PFLAG_FROZEN; 1135 1136 if (ap->ops->thaw) 1137 ap->ops->thaw(ap); 1138 1139 spin_unlock_irqrestore(ap->lock, flags); 1140 1141 DPRINTK("ata%u port thawed\n", ap->print_id); 1142 } 1143 1144 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1145 { 1146 /* nada */ 1147 } 1148 1149 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1150 { 1151 struct ata_port *ap = qc->ap; 1152 struct scsi_cmnd *scmd = qc->scsicmd; 1153 unsigned long flags; 1154 1155 spin_lock_irqsave(ap->lock, flags); 1156 qc->scsidone = ata_eh_scsidone; 1157 __ata_qc_complete(qc); 1158 WARN_ON(ata_tag_valid(qc->tag)); 1159 spin_unlock_irqrestore(ap->lock, flags); 1160 1161 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1162 } 1163 1164 /** 1165 * ata_eh_qc_complete - Complete an active ATA command from EH 1166 * @qc: Command to complete 1167 * 1168 * Indicate to the mid and upper layers that an ATA command has 1169 * completed. To be used from EH. 1170 */ 1171 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1172 { 1173 struct scsi_cmnd *scmd = qc->scsicmd; 1174 scmd->retries = scmd->allowed; 1175 __ata_eh_qc_complete(qc); 1176 } 1177 1178 /** 1179 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1180 * @qc: Command to retry 1181 * 1182 * Indicate to the mid and upper layers that an ATA command 1183 * should be retried. To be used from EH. 1184 * 1185 * SCSI midlayer limits the number of retries to scmd->allowed. 1186 * scmd->retries is decremented for commands which get retried 1187 * due to unrelated failures (qc->err_mask is zero). 1188 */ 1189 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1190 { 1191 struct scsi_cmnd *scmd = qc->scsicmd; 1192 if (!qc->err_mask && scmd->retries) 1193 scmd->retries--; 1194 __ata_eh_qc_complete(qc); 1195 } 1196 1197 /** 1198 * ata_dev_disable - disable ATA device 1199 * @dev: ATA device to disable 1200 * 1201 * Disable @dev. 1202 * 1203 * Locking: 1204 * EH context. 1205 */ 1206 void ata_dev_disable(struct ata_device *dev) 1207 { 1208 if (!ata_dev_enabled(dev)) 1209 return; 1210 1211 if (ata_msg_drv(dev->link->ap)) 1212 ata_dev_printk(dev, KERN_WARNING, "disabled\n"); 1213 ata_acpi_on_disable(dev); 1214 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1215 dev->class++; 1216 1217 /* From now till the next successful probe, ering is used to 1218 * track probe failures. Clear accumulated device error info. 1219 */ 1220 ata_ering_clear(&dev->ering); 1221 } 1222 1223 /** 1224 * ata_eh_detach_dev - detach ATA device 1225 * @dev: ATA device to detach 1226 * 1227 * Detach @dev. 1228 * 1229 * LOCKING: 1230 * None. 1231 */ 1232 void ata_eh_detach_dev(struct ata_device *dev) 1233 { 1234 struct ata_link *link = dev->link; 1235 struct ata_port *ap = link->ap; 1236 struct ata_eh_context *ehc = &link->eh_context; 1237 unsigned long flags; 1238 1239 ata_dev_disable(dev); 1240 1241 spin_lock_irqsave(ap->lock, flags); 1242 1243 dev->flags &= ~ATA_DFLAG_DETACH; 1244 1245 if (ata_scsi_offline_dev(dev)) { 1246 dev->flags |= ATA_DFLAG_DETACHED; 1247 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1248 } 1249 1250 /* clear per-dev EH info */ 1251 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1252 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1253 ehc->saved_xfer_mode[dev->devno] = 0; 1254 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1255 1256 spin_unlock_irqrestore(ap->lock, flags); 1257 } 1258 1259 /** 1260 * ata_eh_about_to_do - about to perform eh_action 1261 * @link: target ATA link 1262 * @dev: target ATA dev for per-dev action (can be NULL) 1263 * @action: action about to be performed 1264 * 1265 * Called just before performing EH actions to clear related bits 1266 * in @link->eh_info such that eh actions are not unnecessarily 1267 * repeated. 1268 * 1269 * LOCKING: 1270 * None. 1271 */ 1272 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1273 unsigned int action) 1274 { 1275 struct ata_port *ap = link->ap; 1276 struct ata_eh_info *ehi = &link->eh_info; 1277 struct ata_eh_context *ehc = &link->eh_context; 1278 unsigned long flags; 1279 1280 spin_lock_irqsave(ap->lock, flags); 1281 1282 ata_eh_clear_action(link, dev, ehi, action); 1283 1284 /* About to take EH action, set RECOVERED. Ignore actions on 1285 * slave links as master will do them again. 1286 */ 1287 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1288 ap->pflags |= ATA_PFLAG_RECOVERED; 1289 1290 spin_unlock_irqrestore(ap->lock, flags); 1291 } 1292 1293 /** 1294 * ata_eh_done - EH action complete 1295 * @ap: target ATA port 1296 * @dev: target ATA dev for per-dev action (can be NULL) 1297 * @action: action just completed 1298 * 1299 * Called right after performing EH actions to clear related bits 1300 * in @link->eh_context. 1301 * 1302 * LOCKING: 1303 * None. 1304 */ 1305 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1306 unsigned int action) 1307 { 1308 struct ata_eh_context *ehc = &link->eh_context; 1309 1310 ata_eh_clear_action(link, dev, &ehc->i, action); 1311 } 1312 1313 /** 1314 * ata_err_string - convert err_mask to descriptive string 1315 * @err_mask: error mask to convert to string 1316 * 1317 * Convert @err_mask to descriptive string. Errors are 1318 * prioritized according to severity and only the most severe 1319 * error is reported. 1320 * 1321 * LOCKING: 1322 * None. 1323 * 1324 * RETURNS: 1325 * Descriptive string for @err_mask 1326 */ 1327 static const char *ata_err_string(unsigned int err_mask) 1328 { 1329 if (err_mask & AC_ERR_HOST_BUS) 1330 return "host bus error"; 1331 if (err_mask & AC_ERR_ATA_BUS) 1332 return "ATA bus error"; 1333 if (err_mask & AC_ERR_TIMEOUT) 1334 return "timeout"; 1335 if (err_mask & AC_ERR_HSM) 1336 return "HSM violation"; 1337 if (err_mask & AC_ERR_SYSTEM) 1338 return "internal error"; 1339 if (err_mask & AC_ERR_MEDIA) 1340 return "media error"; 1341 if (err_mask & AC_ERR_INVALID) 1342 return "invalid argument"; 1343 if (err_mask & AC_ERR_DEV) 1344 return "device error"; 1345 return "unknown error"; 1346 } 1347 1348 /** 1349 * ata_read_log_page - read a specific log page 1350 * @dev: target device 1351 * @page: page to read 1352 * @buf: buffer to store read page 1353 * @sectors: number of sectors to read 1354 * 1355 * Read log page using READ_LOG_EXT command. 1356 * 1357 * LOCKING: 1358 * Kernel thread context (may sleep). 1359 * 1360 * RETURNS: 1361 * 0 on success, AC_ERR_* mask otherwise. 1362 */ 1363 static unsigned int ata_read_log_page(struct ata_device *dev, 1364 u8 page, void *buf, unsigned int sectors) 1365 { 1366 struct ata_taskfile tf; 1367 unsigned int err_mask; 1368 1369 DPRINTK("read log page - page %d\n", page); 1370 1371 ata_tf_init(dev, &tf); 1372 tf.command = ATA_CMD_READ_LOG_EXT; 1373 tf.lbal = page; 1374 tf.nsect = sectors; 1375 tf.hob_nsect = sectors >> 8; 1376 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1377 tf.protocol = ATA_PROT_PIO; 1378 1379 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1380 buf, sectors * ATA_SECT_SIZE, 0); 1381 1382 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1383 return err_mask; 1384 } 1385 1386 /** 1387 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1388 * @dev: Device to read log page 10h from 1389 * @tag: Resulting tag of the failed command 1390 * @tf: Resulting taskfile registers of the failed command 1391 * 1392 * Read log page 10h to obtain NCQ error details and clear error 1393 * condition. 1394 * 1395 * LOCKING: 1396 * Kernel thread context (may sleep). 1397 * 1398 * RETURNS: 1399 * 0 on success, -errno otherwise. 1400 */ 1401 static int ata_eh_read_log_10h(struct ata_device *dev, 1402 int *tag, struct ata_taskfile *tf) 1403 { 1404 u8 *buf = dev->link->ap->sector_buf; 1405 unsigned int err_mask; 1406 u8 csum; 1407 int i; 1408 1409 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1410 if (err_mask) 1411 return -EIO; 1412 1413 csum = 0; 1414 for (i = 0; i < ATA_SECT_SIZE; i++) 1415 csum += buf[i]; 1416 if (csum) 1417 ata_dev_printk(dev, KERN_WARNING, 1418 "invalid checksum 0x%x on log page 10h\n", csum); 1419 1420 if (buf[0] & 0x80) 1421 return -ENOENT; 1422 1423 *tag = buf[0] & 0x1f; 1424 1425 tf->command = buf[2]; 1426 tf->feature = buf[3]; 1427 tf->lbal = buf[4]; 1428 tf->lbam = buf[5]; 1429 tf->lbah = buf[6]; 1430 tf->device = buf[7]; 1431 tf->hob_lbal = buf[8]; 1432 tf->hob_lbam = buf[9]; 1433 tf->hob_lbah = buf[10]; 1434 tf->nsect = buf[12]; 1435 tf->hob_nsect = buf[13]; 1436 1437 return 0; 1438 } 1439 1440 /** 1441 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1442 * @dev: target ATAPI device 1443 * @r_sense_key: out parameter for sense_key 1444 * 1445 * Perform ATAPI TEST_UNIT_READY. 1446 * 1447 * LOCKING: 1448 * EH context (may sleep). 1449 * 1450 * RETURNS: 1451 * 0 on success, AC_ERR_* mask on failure. 1452 */ 1453 static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1454 { 1455 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1456 struct ata_taskfile tf; 1457 unsigned int err_mask; 1458 1459 ata_tf_init(dev, &tf); 1460 1461 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1462 tf.command = ATA_CMD_PACKET; 1463 tf.protocol = ATAPI_PROT_NODATA; 1464 1465 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1466 if (err_mask == AC_ERR_DEV) 1467 *r_sense_key = tf.feature >> 4; 1468 return err_mask; 1469 } 1470 1471 /** 1472 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1473 * @dev: device to perform REQUEST_SENSE to 1474 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1475 * @dfl_sense_key: default sense key to use 1476 * 1477 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1478 * SENSE. This function is EH helper. 1479 * 1480 * LOCKING: 1481 * Kernel thread context (may sleep). 1482 * 1483 * RETURNS: 1484 * 0 on success, AC_ERR_* mask on failure 1485 */ 1486 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 1487 u8 *sense_buf, u8 dfl_sense_key) 1488 { 1489 u8 cdb[ATAPI_CDB_LEN] = 1490 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1491 struct ata_port *ap = dev->link->ap; 1492 struct ata_taskfile tf; 1493 1494 DPRINTK("ATAPI request sense\n"); 1495 1496 /* FIXME: is this needed? */ 1497 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1498 1499 /* initialize sense_buf with the error register, 1500 * for the case where they are -not- overwritten 1501 */ 1502 sense_buf[0] = 0x70; 1503 sense_buf[2] = dfl_sense_key; 1504 1505 /* some devices time out if garbage left in tf */ 1506 ata_tf_init(dev, &tf); 1507 1508 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1509 tf.command = ATA_CMD_PACKET; 1510 1511 /* is it pointless to prefer PIO for "safety reasons"? */ 1512 if (ap->flags & ATA_FLAG_PIO_DMA) { 1513 tf.protocol = ATAPI_PROT_DMA; 1514 tf.feature |= ATAPI_PKT_DMA; 1515 } else { 1516 tf.protocol = ATAPI_PROT_PIO; 1517 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1518 tf.lbah = 0; 1519 } 1520 1521 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1522 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1523 } 1524 1525 /** 1526 * ata_eh_analyze_serror - analyze SError for a failed port 1527 * @link: ATA link to analyze SError for 1528 * 1529 * Analyze SError if available and further determine cause of 1530 * failure. 1531 * 1532 * LOCKING: 1533 * None. 1534 */ 1535 static void ata_eh_analyze_serror(struct ata_link *link) 1536 { 1537 struct ata_eh_context *ehc = &link->eh_context; 1538 u32 serror = ehc->i.serror; 1539 unsigned int err_mask = 0, action = 0; 1540 u32 hotplug_mask; 1541 1542 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1543 err_mask |= AC_ERR_ATA_BUS; 1544 action |= ATA_EH_RESET; 1545 } 1546 if (serror & SERR_PROTOCOL) { 1547 err_mask |= AC_ERR_HSM; 1548 action |= ATA_EH_RESET; 1549 } 1550 if (serror & SERR_INTERNAL) { 1551 err_mask |= AC_ERR_SYSTEM; 1552 action |= ATA_EH_RESET; 1553 } 1554 1555 /* Determine whether a hotplug event has occurred. Both 1556 * SError.N/X are considered hotplug events for enabled or 1557 * host links. For disabled PMP links, only N bit is 1558 * considered as X bit is left at 1 for link plugging. 1559 */ 1560 hotplug_mask = 0; 1561 1562 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1563 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1564 else 1565 hotplug_mask = SERR_PHYRDY_CHG; 1566 1567 if (serror & hotplug_mask) 1568 ata_ehi_hotplugged(&ehc->i); 1569 1570 ehc->i.err_mask |= err_mask; 1571 ehc->i.action |= action; 1572 } 1573 1574 /** 1575 * ata_eh_analyze_ncq_error - analyze NCQ error 1576 * @link: ATA link to analyze NCQ error for 1577 * 1578 * Read log page 10h, determine the offending qc and acquire 1579 * error status TF. For NCQ device errors, all LLDDs have to do 1580 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1581 * care of the rest. 1582 * 1583 * LOCKING: 1584 * Kernel thread context (may sleep). 1585 */ 1586 void ata_eh_analyze_ncq_error(struct ata_link *link) 1587 { 1588 struct ata_port *ap = link->ap; 1589 struct ata_eh_context *ehc = &link->eh_context; 1590 struct ata_device *dev = link->device; 1591 struct ata_queued_cmd *qc; 1592 struct ata_taskfile tf; 1593 int tag, rc; 1594 1595 /* if frozen, we can't do much */ 1596 if (ap->pflags & ATA_PFLAG_FROZEN) 1597 return; 1598 1599 /* is it NCQ device error? */ 1600 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1601 return; 1602 1603 /* has LLDD analyzed already? */ 1604 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1605 qc = __ata_qc_from_tag(ap, tag); 1606 1607 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1608 continue; 1609 1610 if (qc->err_mask) 1611 return; 1612 } 1613 1614 /* okay, this error is ours */ 1615 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1616 if (rc) { 1617 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1618 "(errno=%d)\n", rc); 1619 return; 1620 } 1621 1622 if (!(link->sactive & (1 << tag))) { 1623 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1624 "inactive tag %d\n", tag); 1625 return; 1626 } 1627 1628 /* we've got the perpetrator, condemn it */ 1629 qc = __ata_qc_from_tag(ap, tag); 1630 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1631 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1632 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1633 ehc->i.err_mask &= ~AC_ERR_DEV; 1634 } 1635 1636 /** 1637 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1638 * @qc: qc to analyze 1639 * @tf: Taskfile registers to analyze 1640 * 1641 * Analyze taskfile of @qc and further determine cause of 1642 * failure. This function also requests ATAPI sense data if 1643 * avaliable. 1644 * 1645 * LOCKING: 1646 * Kernel thread context (may sleep). 1647 * 1648 * RETURNS: 1649 * Determined recovery action 1650 */ 1651 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1652 const struct ata_taskfile *tf) 1653 { 1654 unsigned int tmp, action = 0; 1655 u8 stat = tf->command, err = tf->feature; 1656 1657 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1658 qc->err_mask |= AC_ERR_HSM; 1659 return ATA_EH_RESET; 1660 } 1661 1662 if (stat & (ATA_ERR | ATA_DF)) 1663 qc->err_mask |= AC_ERR_DEV; 1664 else 1665 return 0; 1666 1667 switch (qc->dev->class) { 1668 case ATA_DEV_ATA: 1669 if (err & ATA_ICRC) 1670 qc->err_mask |= AC_ERR_ATA_BUS; 1671 if (err & ATA_UNC) 1672 qc->err_mask |= AC_ERR_MEDIA; 1673 if (err & ATA_IDNF) 1674 qc->err_mask |= AC_ERR_INVALID; 1675 break; 1676 1677 case ATA_DEV_ATAPI: 1678 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1679 tmp = atapi_eh_request_sense(qc->dev, 1680 qc->scsicmd->sense_buffer, 1681 qc->result_tf.feature >> 4); 1682 if (!tmp) { 1683 /* ATA_QCFLAG_SENSE_VALID is used to 1684 * tell atapi_qc_complete() that sense 1685 * data is already valid. 1686 * 1687 * TODO: interpret sense data and set 1688 * appropriate err_mask. 1689 */ 1690 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1691 } else 1692 qc->err_mask |= tmp; 1693 } 1694 } 1695 1696 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1697 action |= ATA_EH_RESET; 1698 1699 return action; 1700 } 1701 1702 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1703 int *xfer_ok) 1704 { 1705 int base = 0; 1706 1707 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1708 *xfer_ok = 1; 1709 1710 if (!*xfer_ok) 1711 base = ATA_ECAT_DUBIOUS_NONE; 1712 1713 if (err_mask & AC_ERR_ATA_BUS) 1714 return base + ATA_ECAT_ATA_BUS; 1715 1716 if (err_mask & AC_ERR_TIMEOUT) 1717 return base + ATA_ECAT_TOUT_HSM; 1718 1719 if (eflags & ATA_EFLAG_IS_IO) { 1720 if (err_mask & AC_ERR_HSM) 1721 return base + ATA_ECAT_TOUT_HSM; 1722 if ((err_mask & 1723 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1724 return base + ATA_ECAT_UNK_DEV; 1725 } 1726 1727 return 0; 1728 } 1729 1730 struct speed_down_verdict_arg { 1731 u64 since; 1732 int xfer_ok; 1733 int nr_errors[ATA_ECAT_NR]; 1734 }; 1735 1736 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1737 { 1738 struct speed_down_verdict_arg *arg = void_arg; 1739 int cat; 1740 1741 if (ent->timestamp < arg->since) 1742 return -1; 1743 1744 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1745 &arg->xfer_ok); 1746 arg->nr_errors[cat]++; 1747 1748 return 0; 1749 } 1750 1751 /** 1752 * ata_eh_speed_down_verdict - Determine speed down verdict 1753 * @dev: Device of interest 1754 * 1755 * This function examines error ring of @dev and determines 1756 * whether NCQ needs to be turned off, transfer speed should be 1757 * stepped down, or falling back to PIO is necessary. 1758 * 1759 * ECAT_ATA_BUS : ATA_BUS error for any command 1760 * 1761 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1762 * IO commands 1763 * 1764 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1765 * 1766 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1767 * data transfer hasn't been verified. 1768 * 1769 * Verdicts are 1770 * 1771 * NCQ_OFF : Turn off NCQ. 1772 * 1773 * SPEED_DOWN : Speed down transfer speed but don't fall back 1774 * to PIO. 1775 * 1776 * FALLBACK_TO_PIO : Fall back to PIO. 1777 * 1778 * Even if multiple verdicts are returned, only one action is 1779 * taken per error. An action triggered by non-DUBIOUS errors 1780 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1781 * This is to expedite speed down decisions right after device is 1782 * initially configured. 1783 * 1784 * The followings are speed down rules. #1 and #2 deal with 1785 * DUBIOUS errors. 1786 * 1787 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1788 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1789 * 1790 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1791 * occurred during last 5 mins, NCQ_OFF. 1792 * 1793 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1794 * ocurred during last 5 mins, FALLBACK_TO_PIO 1795 * 1796 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1797 * during last 10 mins, NCQ_OFF. 1798 * 1799 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1800 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1801 * 1802 * LOCKING: 1803 * Inherited from caller. 1804 * 1805 * RETURNS: 1806 * OR of ATA_EH_SPDN_* flags. 1807 */ 1808 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1809 { 1810 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1811 u64 j64 = get_jiffies_64(); 1812 struct speed_down_verdict_arg arg; 1813 unsigned int verdict = 0; 1814 1815 /* scan past 5 mins of error history */ 1816 memset(&arg, 0, sizeof(arg)); 1817 arg.since = j64 - min(j64, j5mins); 1818 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1819 1820 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1821 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1822 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1823 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1824 1825 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1826 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1827 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1828 1829 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1830 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1831 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1832 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1833 1834 /* scan past 10 mins of error history */ 1835 memset(&arg, 0, sizeof(arg)); 1836 arg.since = j64 - min(j64, j10mins); 1837 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1838 1839 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1840 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1841 verdict |= ATA_EH_SPDN_NCQ_OFF; 1842 1843 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1844 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1845 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1846 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1847 1848 return verdict; 1849 } 1850 1851 /** 1852 * ata_eh_speed_down - record error and speed down if necessary 1853 * @dev: Failed device 1854 * @eflags: mask of ATA_EFLAG_* flags 1855 * @err_mask: err_mask of the error 1856 * 1857 * Record error and examine error history to determine whether 1858 * adjusting transmission speed is necessary. It also sets 1859 * transmission limits appropriately if such adjustment is 1860 * necessary. 1861 * 1862 * LOCKING: 1863 * Kernel thread context (may sleep). 1864 * 1865 * RETURNS: 1866 * Determined recovery action. 1867 */ 1868 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1869 unsigned int eflags, unsigned int err_mask) 1870 { 1871 struct ata_link *link = ata_dev_phys_link(dev); 1872 int xfer_ok = 0; 1873 unsigned int verdict; 1874 unsigned int action = 0; 1875 1876 /* don't bother if Cat-0 error */ 1877 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1878 return 0; 1879 1880 /* record error and determine whether speed down is necessary */ 1881 ata_ering_record(&dev->ering, eflags, err_mask); 1882 verdict = ata_eh_speed_down_verdict(dev); 1883 1884 /* turn off NCQ? */ 1885 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1886 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1887 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1888 dev->flags |= ATA_DFLAG_NCQ_OFF; 1889 ata_dev_printk(dev, KERN_WARNING, 1890 "NCQ disabled due to excessive errors\n"); 1891 goto done; 1892 } 1893 1894 /* speed down? */ 1895 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1896 /* speed down SATA link speed if possible */ 1897 if (sata_down_spd_limit(link, 0) == 0) { 1898 action |= ATA_EH_RESET; 1899 goto done; 1900 } 1901 1902 /* lower transfer mode */ 1903 if (dev->spdn_cnt < 2) { 1904 static const int dma_dnxfer_sel[] = 1905 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1906 static const int pio_dnxfer_sel[] = 1907 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1908 int sel; 1909 1910 if (dev->xfer_shift != ATA_SHIFT_PIO) 1911 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1912 else 1913 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1914 1915 dev->spdn_cnt++; 1916 1917 if (ata_down_xfermask_limit(dev, sel) == 0) { 1918 action |= ATA_EH_RESET; 1919 goto done; 1920 } 1921 } 1922 } 1923 1924 /* Fall back to PIO? Slowing down to PIO is meaningless for 1925 * SATA ATA devices. Consider it only for PATA and SATAPI. 1926 */ 1927 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1928 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1929 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1930 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1931 dev->spdn_cnt = 0; 1932 action |= ATA_EH_RESET; 1933 goto done; 1934 } 1935 } 1936 1937 return 0; 1938 done: 1939 /* device has been slowed down, blow error history */ 1940 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1941 ata_ering_clear(&dev->ering); 1942 return action; 1943 } 1944 1945 /** 1946 * ata_eh_link_autopsy - analyze error and determine recovery action 1947 * @link: host link to perform autopsy on 1948 * 1949 * Analyze why @link failed and determine which recovery actions 1950 * are needed. This function also sets more detailed AC_ERR_* 1951 * values and fills sense data for ATAPI CHECK SENSE. 1952 * 1953 * LOCKING: 1954 * Kernel thread context (may sleep). 1955 */ 1956 static void ata_eh_link_autopsy(struct ata_link *link) 1957 { 1958 struct ata_port *ap = link->ap; 1959 struct ata_eh_context *ehc = &link->eh_context; 1960 struct ata_device *dev; 1961 unsigned int all_err_mask = 0, eflags = 0; 1962 int tag; 1963 u32 serror; 1964 int rc; 1965 1966 DPRINTK("ENTER\n"); 1967 1968 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1969 return; 1970 1971 /* obtain and analyze SError */ 1972 rc = sata_scr_read(link, SCR_ERROR, &serror); 1973 if (rc == 0) { 1974 ehc->i.serror |= serror; 1975 ata_eh_analyze_serror(link); 1976 } else if (rc != -EOPNOTSUPP) { 1977 /* SError read failed, force reset and probing */ 1978 ehc->i.probe_mask |= ATA_ALL_DEVICES; 1979 ehc->i.action |= ATA_EH_RESET; 1980 ehc->i.err_mask |= AC_ERR_OTHER; 1981 } 1982 1983 /* analyze NCQ failure */ 1984 ata_eh_analyze_ncq_error(link); 1985 1986 /* any real error trumps AC_ERR_OTHER */ 1987 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1988 ehc->i.err_mask &= ~AC_ERR_OTHER; 1989 1990 all_err_mask |= ehc->i.err_mask; 1991 1992 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1993 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1994 1995 if (!(qc->flags & ATA_QCFLAG_FAILED) || 1996 ata_dev_phys_link(qc->dev) != link) 1997 continue; 1998 1999 /* inherit upper level err_mask */ 2000 qc->err_mask |= ehc->i.err_mask; 2001 2002 /* analyze TF */ 2003 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2004 2005 /* DEV errors are probably spurious in case of ATA_BUS error */ 2006 if (qc->err_mask & AC_ERR_ATA_BUS) 2007 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2008 AC_ERR_INVALID); 2009 2010 /* any real error trumps unknown error */ 2011 if (qc->err_mask & ~AC_ERR_OTHER) 2012 qc->err_mask &= ~AC_ERR_OTHER; 2013 2014 /* SENSE_VALID trumps dev/unknown error and revalidation */ 2015 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2016 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2017 2018 /* determine whether the command is worth retrying */ 2019 if (!(qc->err_mask & AC_ERR_INVALID) && 2020 ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) 2021 qc->flags |= ATA_QCFLAG_RETRY; 2022 2023 /* accumulate error info */ 2024 ehc->i.dev = qc->dev; 2025 all_err_mask |= qc->err_mask; 2026 if (qc->flags & ATA_QCFLAG_IO) 2027 eflags |= ATA_EFLAG_IS_IO; 2028 } 2029 2030 /* enforce default EH actions */ 2031 if (ap->pflags & ATA_PFLAG_FROZEN || 2032 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2033 ehc->i.action |= ATA_EH_RESET; 2034 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2035 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2036 ehc->i.action |= ATA_EH_REVALIDATE; 2037 2038 /* If we have offending qcs and the associated failed device, 2039 * perform per-dev EH action only on the offending device. 2040 */ 2041 if (ehc->i.dev) { 2042 ehc->i.dev_action[ehc->i.dev->devno] |= 2043 ehc->i.action & ATA_EH_PERDEV_MASK; 2044 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2045 } 2046 2047 /* propagate timeout to host link */ 2048 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2049 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2050 2051 /* record error and consider speeding down */ 2052 dev = ehc->i.dev; 2053 if (!dev && ((ata_link_max_devices(link) == 1 && 2054 ata_dev_enabled(link->device)))) 2055 dev = link->device; 2056 2057 if (dev) { 2058 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2059 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2060 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2061 } 2062 2063 DPRINTK("EXIT\n"); 2064 } 2065 2066 /** 2067 * ata_eh_autopsy - analyze error and determine recovery action 2068 * @ap: host port to perform autopsy on 2069 * 2070 * Analyze all links of @ap and determine why they failed and 2071 * which recovery actions are needed. 2072 * 2073 * LOCKING: 2074 * Kernel thread context (may sleep). 2075 */ 2076 void ata_eh_autopsy(struct ata_port *ap) 2077 { 2078 struct ata_link *link; 2079 2080 ata_for_each_link(link, ap, EDGE) 2081 ata_eh_link_autopsy(link); 2082 2083 /* Handle the frigging slave link. Autopsy is done similarly 2084 * but actions and flags are transferred over to the master 2085 * link and handled from there. 2086 */ 2087 if (ap->slave_link) { 2088 struct ata_eh_context *mehc = &ap->link.eh_context; 2089 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2090 2091 /* transfer control flags from master to slave */ 2092 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2093 2094 /* perform autopsy on the slave link */ 2095 ata_eh_link_autopsy(ap->slave_link); 2096 2097 /* transfer actions from slave to master and clear slave */ 2098 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2099 mehc->i.action |= sehc->i.action; 2100 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2101 mehc->i.flags |= sehc->i.flags; 2102 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2103 } 2104 2105 /* Autopsy of fanout ports can affect host link autopsy. 2106 * Perform host link autopsy last. 2107 */ 2108 if (sata_pmp_attached(ap)) 2109 ata_eh_link_autopsy(&ap->link); 2110 } 2111 2112 /** 2113 * ata_eh_link_report - report error handling to user 2114 * @link: ATA link EH is going on 2115 * 2116 * Report EH to user. 2117 * 2118 * LOCKING: 2119 * None. 2120 */ 2121 static void ata_eh_link_report(struct ata_link *link) 2122 { 2123 struct ata_port *ap = link->ap; 2124 struct ata_eh_context *ehc = &link->eh_context; 2125 const char *frozen, *desc; 2126 char tries_buf[6]; 2127 int tag, nr_failed = 0; 2128 2129 if (ehc->i.flags & ATA_EHI_QUIET) 2130 return; 2131 2132 desc = NULL; 2133 if (ehc->i.desc[0] != '\0') 2134 desc = ehc->i.desc; 2135 2136 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2137 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2138 2139 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2140 ata_dev_phys_link(qc->dev) != link || 2141 ((qc->flags & ATA_QCFLAG_QUIET) && 2142 qc->err_mask == AC_ERR_DEV)) 2143 continue; 2144 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2145 continue; 2146 2147 nr_failed++; 2148 } 2149 2150 if (!nr_failed && !ehc->i.err_mask) 2151 return; 2152 2153 frozen = ""; 2154 if (ap->pflags & ATA_PFLAG_FROZEN) 2155 frozen = " frozen"; 2156 2157 memset(tries_buf, 0, sizeof(tries_buf)); 2158 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2159 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 2160 ap->eh_tries); 2161 2162 if (ehc->i.dev) { 2163 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 2164 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2165 ehc->i.err_mask, link->sactive, ehc->i.serror, 2166 ehc->i.action, frozen, tries_buf); 2167 if (desc) 2168 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 2169 } else { 2170 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 2171 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2172 ehc->i.err_mask, link->sactive, ehc->i.serror, 2173 ehc->i.action, frozen, tries_buf); 2174 if (desc) 2175 ata_link_printk(link, KERN_ERR, "%s\n", desc); 2176 } 2177 2178 if (ehc->i.serror) 2179 ata_link_printk(link, KERN_ERR, 2180 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2181 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2182 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2183 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2184 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2185 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2186 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2187 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2188 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2189 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2190 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2191 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2192 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2193 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2194 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2195 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2196 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2197 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2198 2199 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2200 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2201 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2202 const u8 *cdb = qc->cdb; 2203 char data_buf[20] = ""; 2204 char cdb_buf[70] = ""; 2205 2206 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2207 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2208 continue; 2209 2210 if (qc->dma_dir != DMA_NONE) { 2211 static const char *dma_str[] = { 2212 [DMA_BIDIRECTIONAL] = "bidi", 2213 [DMA_TO_DEVICE] = "out", 2214 [DMA_FROM_DEVICE] = "in", 2215 }; 2216 static const char *prot_str[] = { 2217 [ATA_PROT_PIO] = "pio", 2218 [ATA_PROT_DMA] = "dma", 2219 [ATA_PROT_NCQ] = "ncq", 2220 [ATAPI_PROT_PIO] = "pio", 2221 [ATAPI_PROT_DMA] = "dma", 2222 }; 2223 2224 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2225 prot_str[qc->tf.protocol], qc->nbytes, 2226 dma_str[qc->dma_dir]); 2227 } 2228 2229 if (ata_is_atapi(qc->tf.protocol)) 2230 snprintf(cdb_buf, sizeof(cdb_buf), 2231 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 2232 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 2233 cdb[0], cdb[1], cdb[2], cdb[3], 2234 cdb[4], cdb[5], cdb[6], cdb[7], 2235 cdb[8], cdb[9], cdb[10], cdb[11], 2236 cdb[12], cdb[13], cdb[14], cdb[15]); 2237 2238 ata_dev_printk(qc->dev, KERN_ERR, 2239 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2240 "tag %d%s\n %s" 2241 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2242 "Emask 0x%x (%s)%s\n", 2243 cmd->command, cmd->feature, cmd->nsect, 2244 cmd->lbal, cmd->lbam, cmd->lbah, 2245 cmd->hob_feature, cmd->hob_nsect, 2246 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2247 cmd->device, qc->tag, data_buf, cdb_buf, 2248 res->command, res->feature, res->nsect, 2249 res->lbal, res->lbam, res->lbah, 2250 res->hob_feature, res->hob_nsect, 2251 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2252 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2253 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2254 2255 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2256 ATA_ERR)) { 2257 if (res->command & ATA_BUSY) 2258 ata_dev_printk(qc->dev, KERN_ERR, 2259 "status: { Busy }\n"); 2260 else 2261 ata_dev_printk(qc->dev, KERN_ERR, 2262 "status: { %s%s%s%s}\n", 2263 res->command & ATA_DRDY ? "DRDY " : "", 2264 res->command & ATA_DF ? "DF " : "", 2265 res->command & ATA_DRQ ? "DRQ " : "", 2266 res->command & ATA_ERR ? "ERR " : ""); 2267 } 2268 2269 if (cmd->command != ATA_CMD_PACKET && 2270 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2271 ATA_ABORTED))) 2272 ata_dev_printk(qc->dev, KERN_ERR, 2273 "error: { %s%s%s%s}\n", 2274 res->feature & ATA_ICRC ? "ICRC " : "", 2275 res->feature & ATA_UNC ? "UNC " : "", 2276 res->feature & ATA_IDNF ? "IDNF " : "", 2277 res->feature & ATA_ABORTED ? "ABRT " : ""); 2278 } 2279 } 2280 2281 /** 2282 * ata_eh_report - report error handling to user 2283 * @ap: ATA port to report EH about 2284 * 2285 * Report EH to user. 2286 * 2287 * LOCKING: 2288 * None. 2289 */ 2290 void ata_eh_report(struct ata_port *ap) 2291 { 2292 struct ata_link *link; 2293 2294 ata_for_each_link(link, ap, HOST_FIRST) 2295 ata_eh_link_report(link); 2296 } 2297 2298 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2299 unsigned int *classes, unsigned long deadline, 2300 bool clear_classes) 2301 { 2302 struct ata_device *dev; 2303 2304 if (clear_classes) 2305 ata_for_each_dev(dev, link, ALL) 2306 classes[dev->devno] = ATA_DEV_UNKNOWN; 2307 2308 return reset(link, classes, deadline); 2309 } 2310 2311 static int ata_eh_followup_srst_needed(struct ata_link *link, 2312 int rc, const unsigned int *classes) 2313 { 2314 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2315 return 0; 2316 if (rc == -EAGAIN) 2317 return 1; 2318 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2319 return 1; 2320 return 0; 2321 } 2322 2323 int ata_eh_reset(struct ata_link *link, int classify, 2324 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2325 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2326 { 2327 struct ata_port *ap = link->ap; 2328 struct ata_link *slave = ap->slave_link; 2329 struct ata_eh_context *ehc = &link->eh_context; 2330 struct ata_eh_context *sehc = &slave->eh_context; 2331 unsigned int *classes = ehc->classes; 2332 unsigned int lflags = link->flags; 2333 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2334 int max_tries = 0, try = 0; 2335 struct ata_link *failed_link; 2336 struct ata_device *dev; 2337 unsigned long deadline, now; 2338 ata_reset_fn_t reset; 2339 unsigned long flags; 2340 u32 sstatus; 2341 int nr_unknown, rc; 2342 2343 /* 2344 * Prepare to reset 2345 */ 2346 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2347 max_tries++; 2348 if (link->flags & ATA_LFLAG_NO_HRST) 2349 hardreset = NULL; 2350 if (link->flags & ATA_LFLAG_NO_SRST) 2351 softreset = NULL; 2352 2353 /* make sure each reset attemp is at least COOL_DOWN apart */ 2354 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2355 now = jiffies; 2356 WARN_ON(time_after(ehc->last_reset, now)); 2357 deadline = ata_deadline(ehc->last_reset, 2358 ATA_EH_RESET_COOL_DOWN); 2359 if (time_before(now, deadline)) 2360 schedule_timeout_uninterruptible(deadline - now); 2361 } 2362 2363 spin_lock_irqsave(ap->lock, flags); 2364 ap->pflags |= ATA_PFLAG_RESETTING; 2365 spin_unlock_irqrestore(ap->lock, flags); 2366 2367 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2368 2369 ata_for_each_dev(dev, link, ALL) { 2370 /* If we issue an SRST then an ATA drive (not ATAPI) 2371 * may change configuration and be in PIO0 timing. If 2372 * we do a hard reset (or are coming from power on) 2373 * this is true for ATA or ATAPI. Until we've set a 2374 * suitable controller mode we should not touch the 2375 * bus as we may be talking too fast. 2376 */ 2377 dev->pio_mode = XFER_PIO_0; 2378 2379 /* If the controller has a pio mode setup function 2380 * then use it to set the chipset to rights. Don't 2381 * touch the DMA setup as that will be dealt with when 2382 * configuring devices. 2383 */ 2384 if (ap->ops->set_piomode) 2385 ap->ops->set_piomode(ap, dev); 2386 } 2387 2388 /* prefer hardreset */ 2389 reset = NULL; 2390 ehc->i.action &= ~ATA_EH_RESET; 2391 if (hardreset) { 2392 reset = hardreset; 2393 ehc->i.action |= ATA_EH_HARDRESET; 2394 } else if (softreset) { 2395 reset = softreset; 2396 ehc->i.action |= ATA_EH_SOFTRESET; 2397 } 2398 2399 if (prereset) { 2400 unsigned long deadline = ata_deadline(jiffies, 2401 ATA_EH_PRERESET_TIMEOUT); 2402 2403 if (slave) { 2404 sehc->i.action &= ~ATA_EH_RESET; 2405 sehc->i.action |= ehc->i.action; 2406 } 2407 2408 rc = prereset(link, deadline); 2409 2410 /* If present, do prereset on slave link too. Reset 2411 * is skipped iff both master and slave links report 2412 * -ENOENT or clear ATA_EH_RESET. 2413 */ 2414 if (slave && (rc == 0 || rc == -ENOENT)) { 2415 int tmp; 2416 2417 tmp = prereset(slave, deadline); 2418 if (tmp != -ENOENT) 2419 rc = tmp; 2420 2421 ehc->i.action |= sehc->i.action; 2422 } 2423 2424 if (rc) { 2425 if (rc == -ENOENT) { 2426 ata_link_printk(link, KERN_DEBUG, 2427 "port disabled. ignoring.\n"); 2428 ehc->i.action &= ~ATA_EH_RESET; 2429 2430 ata_for_each_dev(dev, link, ALL) 2431 classes[dev->devno] = ATA_DEV_NONE; 2432 2433 rc = 0; 2434 } else 2435 ata_link_printk(link, KERN_ERR, 2436 "prereset failed (errno=%d)\n", rc); 2437 goto out; 2438 } 2439 2440 /* prereset() might have cleared ATA_EH_RESET. If so, 2441 * bang classes, thaw and return. 2442 */ 2443 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2444 ata_for_each_dev(dev, link, ALL) 2445 classes[dev->devno] = ATA_DEV_NONE; 2446 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2447 ata_is_host_link(link)) 2448 ata_eh_thaw_port(ap); 2449 rc = 0; 2450 goto out; 2451 } 2452 } 2453 2454 retry: 2455 /* 2456 * Perform reset 2457 */ 2458 if (ata_is_host_link(link)) 2459 ata_eh_freeze_port(ap); 2460 2461 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2462 2463 if (reset) { 2464 if (verbose) 2465 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2466 reset == softreset ? "soft" : "hard"); 2467 2468 /* mark that this EH session started with reset */ 2469 ehc->last_reset = jiffies; 2470 if (reset == hardreset) 2471 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2472 else 2473 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2474 2475 rc = ata_do_reset(link, reset, classes, deadline, true); 2476 if (rc && rc != -EAGAIN) { 2477 failed_link = link; 2478 goto fail; 2479 } 2480 2481 /* hardreset slave link if existent */ 2482 if (slave && reset == hardreset) { 2483 int tmp; 2484 2485 if (verbose) 2486 ata_link_printk(slave, KERN_INFO, 2487 "hard resetting link\n"); 2488 2489 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2490 tmp = ata_do_reset(slave, reset, classes, deadline, 2491 false); 2492 switch (tmp) { 2493 case -EAGAIN: 2494 rc = -EAGAIN; 2495 case 0: 2496 break; 2497 default: 2498 failed_link = slave; 2499 rc = tmp; 2500 goto fail; 2501 } 2502 } 2503 2504 /* perform follow-up SRST if necessary */ 2505 if (reset == hardreset && 2506 ata_eh_followup_srst_needed(link, rc, classes)) { 2507 reset = softreset; 2508 2509 if (!reset) { 2510 ata_link_printk(link, KERN_ERR, 2511 "follow-up softreset required " 2512 "but no softreset avaliable\n"); 2513 failed_link = link; 2514 rc = -EINVAL; 2515 goto fail; 2516 } 2517 2518 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2519 rc = ata_do_reset(link, reset, classes, deadline, true); 2520 if (rc) { 2521 failed_link = link; 2522 goto fail; 2523 } 2524 } 2525 } else { 2526 if (verbose) 2527 ata_link_printk(link, KERN_INFO, "no reset method " 2528 "available, skipping reset\n"); 2529 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2530 lflags |= ATA_LFLAG_ASSUME_ATA; 2531 } 2532 2533 /* 2534 * Post-reset processing 2535 */ 2536 ata_for_each_dev(dev, link, ALL) { 2537 /* After the reset, the device state is PIO 0 and the 2538 * controller state is undefined. Reset also wakes up 2539 * drives from sleeping mode. 2540 */ 2541 dev->pio_mode = XFER_PIO_0; 2542 dev->flags &= ~ATA_DFLAG_SLEEPING; 2543 2544 if (!ata_phys_link_offline(ata_dev_phys_link(dev))) { 2545 /* apply class override */ 2546 if (lflags & ATA_LFLAG_ASSUME_ATA) 2547 classes[dev->devno] = ATA_DEV_ATA; 2548 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2549 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2550 } else 2551 classes[dev->devno] = ATA_DEV_NONE; 2552 } 2553 2554 /* record current link speed */ 2555 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2556 link->sata_spd = (sstatus >> 4) & 0xf; 2557 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2558 slave->sata_spd = (sstatus >> 4) & 0xf; 2559 2560 /* thaw the port */ 2561 if (ata_is_host_link(link)) 2562 ata_eh_thaw_port(ap); 2563 2564 /* postreset() should clear hardware SError. Although SError 2565 * is cleared during link resume, clearing SError here is 2566 * necessary as some PHYs raise hotplug events after SRST. 2567 * This introduces race condition where hotplug occurs between 2568 * reset and here. This race is mediated by cross checking 2569 * link onlineness and classification result later. 2570 */ 2571 if (postreset) { 2572 postreset(link, classes); 2573 if (slave) 2574 postreset(slave, classes); 2575 } 2576 2577 /* clear cached SError */ 2578 spin_lock_irqsave(link->ap->lock, flags); 2579 link->eh_info.serror = 0; 2580 if (slave) 2581 slave->eh_info.serror = 0; 2582 spin_unlock_irqrestore(link->ap->lock, flags); 2583 2584 /* Make sure onlineness and classification result correspond. 2585 * Hotplug could have happened during reset and some 2586 * controllers fail to wait while a drive is spinning up after 2587 * being hotplugged causing misdetection. By cross checking 2588 * link onlineness and classification result, those conditions 2589 * can be reliably detected and retried. 2590 */ 2591 nr_unknown = 0; 2592 ata_for_each_dev(dev, link, ALL) { 2593 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ 2594 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2595 classes[dev->devno] = ATA_DEV_NONE; 2596 if (ata_phys_link_online(ata_dev_phys_link(dev))) 2597 nr_unknown++; 2598 } 2599 } 2600 2601 if (classify && nr_unknown) { 2602 if (try < max_tries) { 2603 ata_link_printk(link, KERN_WARNING, "link online but " 2604 "device misclassified, retrying\n"); 2605 failed_link = link; 2606 rc = -EAGAIN; 2607 goto fail; 2608 } 2609 ata_link_printk(link, KERN_WARNING, 2610 "link online but device misclassified, " 2611 "device detection might fail\n"); 2612 } 2613 2614 /* reset successful, schedule revalidation */ 2615 ata_eh_done(link, NULL, ATA_EH_RESET); 2616 if (slave) 2617 ata_eh_done(slave, NULL, ATA_EH_RESET); 2618 ehc->last_reset = jiffies; /* update to completion time */ 2619 ehc->i.action |= ATA_EH_REVALIDATE; 2620 2621 rc = 0; 2622 out: 2623 /* clear hotplug flag */ 2624 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2625 if (slave) 2626 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2627 2628 spin_lock_irqsave(ap->lock, flags); 2629 ap->pflags &= ~ATA_PFLAG_RESETTING; 2630 spin_unlock_irqrestore(ap->lock, flags); 2631 2632 return rc; 2633 2634 fail: 2635 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2636 if (!ata_is_host_link(link) && 2637 sata_scr_read(link, SCR_STATUS, &sstatus)) 2638 rc = -ERESTART; 2639 2640 if (rc == -ERESTART || try >= max_tries) 2641 goto out; 2642 2643 now = jiffies; 2644 if (time_before(now, deadline)) { 2645 unsigned long delta = deadline - now; 2646 2647 ata_link_printk(failed_link, KERN_WARNING, 2648 "reset failed (errno=%d), retrying in %u secs\n", 2649 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2650 2651 while (delta) 2652 delta = schedule_timeout_uninterruptible(delta); 2653 } 2654 2655 if (try == max_tries - 1) { 2656 sata_down_spd_limit(link, 0); 2657 if (slave) 2658 sata_down_spd_limit(slave, 0); 2659 } else if (rc == -EPIPE) 2660 sata_down_spd_limit(failed_link, 0); 2661 2662 if (hardreset) 2663 reset = hardreset; 2664 goto retry; 2665 } 2666 2667 static inline void ata_eh_pull_park_action(struct ata_port *ap) 2668 { 2669 struct ata_link *link; 2670 struct ata_device *dev; 2671 unsigned long flags; 2672 2673 /* 2674 * This function can be thought of as an extended version of 2675 * ata_eh_about_to_do() specially crafted to accommodate the 2676 * requirements of ATA_EH_PARK handling. Since the EH thread 2677 * does not leave the do {} while () loop in ata_eh_recover as 2678 * long as the timeout for a park request to *one* device on 2679 * the port has not expired, and since we still want to pick 2680 * up park requests to other devices on the same port or 2681 * timeout updates for the same device, we have to pull 2682 * ATA_EH_PARK actions from eh_info into eh_context.i 2683 * ourselves at the beginning of each pass over the loop. 2684 * 2685 * Additionally, all write accesses to &ap->park_req_pending 2686 * through INIT_COMPLETION() (see below) or complete_all() 2687 * (see ata_scsi_park_store()) are protected by the host lock. 2688 * As a result we have that park_req_pending.done is zero on 2689 * exit from this function, i.e. when ATA_EH_PARK actions for 2690 * *all* devices on port ap have been pulled into the 2691 * respective eh_context structs. If, and only if, 2692 * park_req_pending.done is non-zero by the time we reach 2693 * wait_for_completion_timeout(), another ATA_EH_PARK action 2694 * has been scheduled for at least one of the devices on port 2695 * ap and we have to cycle over the do {} while () loop in 2696 * ata_eh_recover() again. 2697 */ 2698 2699 spin_lock_irqsave(ap->lock, flags); 2700 INIT_COMPLETION(ap->park_req_pending); 2701 ata_for_each_link(link, ap, EDGE) { 2702 ata_for_each_dev(dev, link, ALL) { 2703 struct ata_eh_info *ehi = &link->eh_info; 2704 2705 link->eh_context.i.dev_action[dev->devno] |= 2706 ehi->dev_action[dev->devno] & ATA_EH_PARK; 2707 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 2708 } 2709 } 2710 spin_unlock_irqrestore(ap->lock, flags); 2711 } 2712 2713 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 2714 { 2715 struct ata_eh_context *ehc = &dev->link->eh_context; 2716 struct ata_taskfile tf; 2717 unsigned int err_mask; 2718 2719 ata_tf_init(dev, &tf); 2720 if (park) { 2721 ehc->unloaded_mask |= 1 << dev->devno; 2722 tf.command = ATA_CMD_IDLEIMMEDIATE; 2723 tf.feature = 0x44; 2724 tf.lbal = 0x4c; 2725 tf.lbam = 0x4e; 2726 tf.lbah = 0x55; 2727 } else { 2728 ehc->unloaded_mask &= ~(1 << dev->devno); 2729 tf.command = ATA_CMD_CHK_POWER; 2730 } 2731 2732 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 2733 tf.protocol |= ATA_PROT_NODATA; 2734 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 2735 if (park && (err_mask || tf.lbal != 0xc4)) { 2736 ata_dev_printk(dev, KERN_ERR, "head unload failed!\n"); 2737 ehc->unloaded_mask &= ~(1 << dev->devno); 2738 } 2739 } 2740 2741 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2742 struct ata_device **r_failed_dev) 2743 { 2744 struct ata_port *ap = link->ap; 2745 struct ata_eh_context *ehc = &link->eh_context; 2746 struct ata_device *dev; 2747 unsigned int new_mask = 0; 2748 unsigned long flags; 2749 int rc = 0; 2750 2751 DPRINTK("ENTER\n"); 2752 2753 /* For PATA drive side cable detection to work, IDENTIFY must 2754 * be done backwards such that PDIAG- is released by the slave 2755 * device before the master device is identified. 2756 */ 2757 ata_for_each_dev(dev, link, ALL_REVERSE) { 2758 unsigned int action = ata_eh_dev_action(dev); 2759 unsigned int readid_flags = 0; 2760 2761 if (ehc->i.flags & ATA_EHI_DID_RESET) 2762 readid_flags |= ATA_READID_POSTRESET; 2763 2764 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2765 WARN_ON(dev->class == ATA_DEV_PMP); 2766 2767 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2768 rc = -EIO; 2769 goto err; 2770 } 2771 2772 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2773 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2774 readid_flags); 2775 if (rc) 2776 goto err; 2777 2778 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2779 2780 /* Configuration may have changed, reconfigure 2781 * transfer mode. 2782 */ 2783 ehc->i.flags |= ATA_EHI_SETMODE; 2784 2785 /* schedule the scsi_rescan_device() here */ 2786 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2787 } else if (dev->class == ATA_DEV_UNKNOWN && 2788 ehc->tries[dev->devno] && 2789 ata_class_enabled(ehc->classes[dev->devno])) { 2790 /* Temporarily set dev->class, it will be 2791 * permanently set once all configurations are 2792 * complete. This is necessary because new 2793 * device configuration is done in two 2794 * separate loops. 2795 */ 2796 dev->class = ehc->classes[dev->devno]; 2797 2798 if (dev->class == ATA_DEV_PMP) 2799 rc = sata_pmp_attach(dev); 2800 else 2801 rc = ata_dev_read_id(dev, &dev->class, 2802 readid_flags, dev->id); 2803 2804 /* read_id might have changed class, store and reset */ 2805 ehc->classes[dev->devno] = dev->class; 2806 dev->class = ATA_DEV_UNKNOWN; 2807 2808 switch (rc) { 2809 case 0: 2810 /* clear error info accumulated during probe */ 2811 ata_ering_clear(&dev->ering); 2812 new_mask |= 1 << dev->devno; 2813 break; 2814 case -ENOENT: 2815 /* IDENTIFY was issued to non-existent 2816 * device. No need to reset. Just 2817 * thaw and ignore the device. 2818 */ 2819 ata_eh_thaw_port(ap); 2820 break; 2821 default: 2822 goto err; 2823 } 2824 } 2825 } 2826 2827 /* PDIAG- should have been released, ask cable type if post-reset */ 2828 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2829 if (ap->ops->cable_detect) 2830 ap->cbl = ap->ops->cable_detect(ap); 2831 ata_force_cbl(ap); 2832 } 2833 2834 /* Configure new devices forward such that user doesn't see 2835 * device detection messages backwards. 2836 */ 2837 ata_for_each_dev(dev, link, ALL) { 2838 if (!(new_mask & (1 << dev->devno)) || 2839 dev->class == ATA_DEV_PMP) 2840 continue; 2841 2842 dev->class = ehc->classes[dev->devno]; 2843 2844 ehc->i.flags |= ATA_EHI_PRINTINFO; 2845 rc = ata_dev_configure(dev); 2846 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2847 if (rc) { 2848 dev->class = ATA_DEV_UNKNOWN; 2849 goto err; 2850 } 2851 2852 spin_lock_irqsave(ap->lock, flags); 2853 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2854 spin_unlock_irqrestore(ap->lock, flags); 2855 2856 /* new device discovered, configure xfermode */ 2857 ehc->i.flags |= ATA_EHI_SETMODE; 2858 } 2859 2860 return 0; 2861 2862 err: 2863 *r_failed_dev = dev; 2864 DPRINTK("EXIT rc=%d\n", rc); 2865 return rc; 2866 } 2867 2868 /** 2869 * ata_set_mode - Program timings and issue SET FEATURES - XFER 2870 * @link: link on which timings will be programmed 2871 * @r_failed_dev: out parameter for failed device 2872 * 2873 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 2874 * ata_set_mode() fails, pointer to the failing device is 2875 * returned in @r_failed_dev. 2876 * 2877 * LOCKING: 2878 * PCI/etc. bus probe sem. 2879 * 2880 * RETURNS: 2881 * 0 on success, negative errno otherwise 2882 */ 2883 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 2884 { 2885 struct ata_port *ap = link->ap; 2886 struct ata_device *dev; 2887 int rc; 2888 2889 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 2890 ata_for_each_dev(dev, link, ENABLED) { 2891 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 2892 struct ata_ering_entry *ent; 2893 2894 ent = ata_ering_top(&dev->ering); 2895 if (ent) 2896 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 2897 } 2898 } 2899 2900 /* has private set_mode? */ 2901 if (ap->ops->set_mode) 2902 rc = ap->ops->set_mode(link, r_failed_dev); 2903 else 2904 rc = ata_do_set_mode(link, r_failed_dev); 2905 2906 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 2907 ata_for_each_dev(dev, link, ENABLED) { 2908 struct ata_eh_context *ehc = &link->eh_context; 2909 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 2910 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 2911 2912 if (dev->xfer_mode != saved_xfer_mode || 2913 ata_ncq_enabled(dev) != saved_ncq) 2914 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 2915 } 2916 2917 return rc; 2918 } 2919 2920 /** 2921 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 2922 * @dev: ATAPI device to clear UA for 2923 * 2924 * Resets and other operations can make an ATAPI device raise 2925 * UNIT ATTENTION which causes the next operation to fail. This 2926 * function clears UA. 2927 * 2928 * LOCKING: 2929 * EH context (may sleep). 2930 * 2931 * RETURNS: 2932 * 0 on success, -errno on failure. 2933 */ 2934 static int atapi_eh_clear_ua(struct ata_device *dev) 2935 { 2936 int i; 2937 2938 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 2939 u8 *sense_buffer = dev->link->ap->sector_buf; 2940 u8 sense_key = 0; 2941 unsigned int err_mask; 2942 2943 err_mask = atapi_eh_tur(dev, &sense_key); 2944 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 2945 ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY " 2946 "failed (err_mask=0x%x)\n", err_mask); 2947 return -EIO; 2948 } 2949 2950 if (!err_mask || sense_key != UNIT_ATTENTION) 2951 return 0; 2952 2953 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 2954 if (err_mask) { 2955 ata_dev_printk(dev, KERN_WARNING, "failed to clear " 2956 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 2957 return -EIO; 2958 } 2959 } 2960 2961 ata_dev_printk(dev, KERN_WARNING, 2962 "UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES); 2963 2964 return 0; 2965 } 2966 2967 static int ata_link_nr_enabled(struct ata_link *link) 2968 { 2969 struct ata_device *dev; 2970 int cnt = 0; 2971 2972 ata_for_each_dev(dev, link, ENABLED) 2973 cnt++; 2974 return cnt; 2975 } 2976 2977 static int ata_link_nr_vacant(struct ata_link *link) 2978 { 2979 struct ata_device *dev; 2980 int cnt = 0; 2981 2982 ata_for_each_dev(dev, link, ALL) 2983 if (dev->class == ATA_DEV_UNKNOWN) 2984 cnt++; 2985 return cnt; 2986 } 2987 2988 static int ata_eh_skip_recovery(struct ata_link *link) 2989 { 2990 struct ata_port *ap = link->ap; 2991 struct ata_eh_context *ehc = &link->eh_context; 2992 struct ata_device *dev; 2993 2994 /* skip disabled links */ 2995 if (link->flags & ATA_LFLAG_DISABLED) 2996 return 1; 2997 2998 /* thaw frozen port and recover failed devices */ 2999 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3000 return 0; 3001 3002 /* reset at least once if reset is requested */ 3003 if ((ehc->i.action & ATA_EH_RESET) && 3004 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3005 return 0; 3006 3007 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3008 ata_for_each_dev(dev, link, ALL) { 3009 if (dev->class == ATA_DEV_UNKNOWN && 3010 ehc->classes[dev->devno] != ATA_DEV_NONE) 3011 return 0; 3012 } 3013 3014 return 1; 3015 } 3016 3017 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3018 { 3019 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3020 u64 now = get_jiffies_64(); 3021 int *trials = void_arg; 3022 3023 if (ent->timestamp < now - min(now, interval)) 3024 return -1; 3025 3026 (*trials)++; 3027 return 0; 3028 } 3029 3030 static int ata_eh_schedule_probe(struct ata_device *dev) 3031 { 3032 struct ata_eh_context *ehc = &dev->link->eh_context; 3033 struct ata_link *link = ata_dev_phys_link(dev); 3034 int trials = 0; 3035 3036 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3037 (ehc->did_probe_mask & (1 << dev->devno))) 3038 return 0; 3039 3040 ata_eh_detach_dev(dev); 3041 ata_dev_init(dev); 3042 ehc->did_probe_mask |= (1 << dev->devno); 3043 ehc->i.action |= ATA_EH_RESET; 3044 ehc->saved_xfer_mode[dev->devno] = 0; 3045 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3046 3047 /* Record and count probe trials on the ering. The specific 3048 * error mask used is irrelevant. Because a successful device 3049 * detection clears the ering, this count accumulates only if 3050 * there are consecutive failed probes. 3051 * 3052 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3053 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3054 * forced to 1.5Gbps. 3055 * 3056 * This is to work around cases where failed link speed 3057 * negotiation results in device misdetection leading to 3058 * infinite DEVXCHG or PHRDY CHG events. 3059 */ 3060 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3061 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3062 3063 if (trials > ATA_EH_PROBE_TRIALS) 3064 sata_down_spd_limit(link, 1); 3065 3066 return 1; 3067 } 3068 3069 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3070 { 3071 struct ata_eh_context *ehc = &dev->link->eh_context; 3072 3073 /* -EAGAIN from EH routine indicates retry without prejudice. 3074 * The requester is responsible for ensuring forward progress. 3075 */ 3076 if (err != -EAGAIN) 3077 ehc->tries[dev->devno]--; 3078 3079 switch (err) { 3080 case -ENODEV: 3081 /* device missing or wrong IDENTIFY data, schedule probing */ 3082 ehc->i.probe_mask |= (1 << dev->devno); 3083 case -EINVAL: 3084 /* give it just one more chance */ 3085 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3086 case -EIO: 3087 if (ehc->tries[dev->devno] == 1) { 3088 /* This is the last chance, better to slow 3089 * down than lose it. 3090 */ 3091 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3092 if (dev->pio_mode > XFER_PIO_0) 3093 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3094 } 3095 } 3096 3097 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3098 /* disable device if it has used up all its chances */ 3099 ata_dev_disable(dev); 3100 3101 /* detach if offline */ 3102 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3103 ata_eh_detach_dev(dev); 3104 3105 /* schedule probe if necessary */ 3106 if (ata_eh_schedule_probe(dev)) { 3107 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3108 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3109 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3110 } 3111 3112 return 1; 3113 } else { 3114 ehc->i.action |= ATA_EH_RESET; 3115 return 0; 3116 } 3117 } 3118 3119 /** 3120 * ata_eh_recover - recover host port after error 3121 * @ap: host port to recover 3122 * @prereset: prereset method (can be NULL) 3123 * @softreset: softreset method (can be NULL) 3124 * @hardreset: hardreset method (can be NULL) 3125 * @postreset: postreset method (can be NULL) 3126 * @r_failed_link: out parameter for failed link 3127 * 3128 * This is the alpha and omega, eum and yang, heart and soul of 3129 * libata exception handling. On entry, actions required to 3130 * recover each link and hotplug requests are recorded in the 3131 * link's eh_context. This function executes all the operations 3132 * with appropriate retrials and fallbacks to resurrect failed 3133 * devices, detach goners and greet newcomers. 3134 * 3135 * LOCKING: 3136 * Kernel thread context (may sleep). 3137 * 3138 * RETURNS: 3139 * 0 on success, -errno on failure. 3140 */ 3141 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3142 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3143 ata_postreset_fn_t postreset, 3144 struct ata_link **r_failed_link) 3145 { 3146 struct ata_link *link; 3147 struct ata_device *dev; 3148 int nr_failed_devs; 3149 int rc; 3150 unsigned long flags, deadline; 3151 3152 DPRINTK("ENTER\n"); 3153 3154 /* prep for recovery */ 3155 ata_for_each_link(link, ap, EDGE) { 3156 struct ata_eh_context *ehc = &link->eh_context; 3157 3158 /* re-enable link? */ 3159 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3160 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3161 spin_lock_irqsave(ap->lock, flags); 3162 link->flags &= ~ATA_LFLAG_DISABLED; 3163 spin_unlock_irqrestore(ap->lock, flags); 3164 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3165 } 3166 3167 ata_for_each_dev(dev, link, ALL) { 3168 if (link->flags & ATA_LFLAG_NO_RETRY) 3169 ehc->tries[dev->devno] = 1; 3170 else 3171 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3172 3173 /* collect port action mask recorded in dev actions */ 3174 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3175 ~ATA_EH_PERDEV_MASK; 3176 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3177 3178 /* process hotplug request */ 3179 if (dev->flags & ATA_DFLAG_DETACH) 3180 ata_eh_detach_dev(dev); 3181 3182 /* schedule probe if necessary */ 3183 if (!ata_dev_enabled(dev)) 3184 ata_eh_schedule_probe(dev); 3185 } 3186 } 3187 3188 retry: 3189 rc = 0; 3190 nr_failed_devs = 0; 3191 3192 /* if UNLOADING, finish immediately */ 3193 if (ap->pflags & ATA_PFLAG_UNLOADING) 3194 goto out; 3195 3196 /* prep for EH */ 3197 ata_for_each_link(link, ap, EDGE) { 3198 struct ata_eh_context *ehc = &link->eh_context; 3199 3200 /* skip EH if possible. */ 3201 if (ata_eh_skip_recovery(link)) 3202 ehc->i.action = 0; 3203 3204 ata_for_each_dev(dev, link, ALL) 3205 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3206 } 3207 3208 /* reset */ 3209 ata_for_each_link(link, ap, EDGE) { 3210 struct ata_eh_context *ehc = &link->eh_context; 3211 3212 if (!(ehc->i.action & ATA_EH_RESET)) 3213 continue; 3214 3215 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3216 prereset, softreset, hardreset, postreset); 3217 if (rc) { 3218 ata_link_printk(link, KERN_ERR, 3219 "reset failed, giving up\n"); 3220 goto out; 3221 } 3222 } 3223 3224 do { 3225 unsigned long now; 3226 3227 /* 3228 * clears ATA_EH_PARK in eh_info and resets 3229 * ap->park_req_pending 3230 */ 3231 ata_eh_pull_park_action(ap); 3232 3233 deadline = jiffies; 3234 ata_for_each_link(link, ap, EDGE) { 3235 ata_for_each_dev(dev, link, ALL) { 3236 struct ata_eh_context *ehc = &link->eh_context; 3237 unsigned long tmp; 3238 3239 if (dev->class != ATA_DEV_ATA) 3240 continue; 3241 if (!(ehc->i.dev_action[dev->devno] & 3242 ATA_EH_PARK)) 3243 continue; 3244 tmp = dev->unpark_deadline; 3245 if (time_before(deadline, tmp)) 3246 deadline = tmp; 3247 else if (time_before_eq(tmp, jiffies)) 3248 continue; 3249 if (ehc->unloaded_mask & (1 << dev->devno)) 3250 continue; 3251 3252 ata_eh_park_issue_cmd(dev, 1); 3253 } 3254 } 3255 3256 now = jiffies; 3257 if (time_before_eq(deadline, now)) 3258 break; 3259 3260 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3261 deadline - now); 3262 } while (deadline); 3263 ata_for_each_link(link, ap, EDGE) { 3264 ata_for_each_dev(dev, link, ALL) { 3265 if (!(link->eh_context.unloaded_mask & 3266 (1 << dev->devno))) 3267 continue; 3268 3269 ata_eh_park_issue_cmd(dev, 0); 3270 ata_eh_done(link, dev, ATA_EH_PARK); 3271 } 3272 } 3273 3274 /* the rest */ 3275 ata_for_each_link(link, ap, EDGE) { 3276 struct ata_eh_context *ehc = &link->eh_context; 3277 3278 /* revalidate existing devices and attach new ones */ 3279 rc = ata_eh_revalidate_and_attach(link, &dev); 3280 if (rc) 3281 goto dev_fail; 3282 3283 /* if PMP got attached, return, pmp EH will take care of it */ 3284 if (link->device->class == ATA_DEV_PMP) { 3285 ehc->i.action = 0; 3286 return 0; 3287 } 3288 3289 /* configure transfer mode if necessary */ 3290 if (ehc->i.flags & ATA_EHI_SETMODE) { 3291 rc = ata_set_mode(link, &dev); 3292 if (rc) 3293 goto dev_fail; 3294 ehc->i.flags &= ~ATA_EHI_SETMODE; 3295 } 3296 3297 /* If reset has been issued, clear UA to avoid 3298 * disrupting the current users of the device. 3299 */ 3300 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3301 ata_for_each_dev(dev, link, ALL) { 3302 if (dev->class != ATA_DEV_ATAPI) 3303 continue; 3304 rc = atapi_eh_clear_ua(dev); 3305 if (rc) 3306 goto dev_fail; 3307 } 3308 } 3309 3310 /* configure link power saving */ 3311 if (ehc->i.action & ATA_EH_LPM) 3312 ata_for_each_dev(dev, link, ALL) 3313 ata_dev_enable_pm(dev, ap->pm_policy); 3314 3315 /* this link is okay now */ 3316 ehc->i.flags = 0; 3317 continue; 3318 3319 dev_fail: 3320 nr_failed_devs++; 3321 ata_eh_handle_dev_fail(dev, rc); 3322 3323 if (ap->pflags & ATA_PFLAG_FROZEN) { 3324 /* PMP reset requires working host port. 3325 * Can't retry if it's frozen. 3326 */ 3327 if (sata_pmp_attached(ap)) 3328 goto out; 3329 break; 3330 } 3331 } 3332 3333 if (nr_failed_devs) 3334 goto retry; 3335 3336 out: 3337 if (rc && r_failed_link) 3338 *r_failed_link = link; 3339 3340 DPRINTK("EXIT, rc=%d\n", rc); 3341 return rc; 3342 } 3343 3344 /** 3345 * ata_eh_finish - finish up EH 3346 * @ap: host port to finish EH for 3347 * 3348 * Recovery is complete. Clean up EH states and retry or finish 3349 * failed qcs. 3350 * 3351 * LOCKING: 3352 * None. 3353 */ 3354 void ata_eh_finish(struct ata_port *ap) 3355 { 3356 int tag; 3357 3358 /* retry or finish qcs */ 3359 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 3360 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 3361 3362 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3363 continue; 3364 3365 if (qc->err_mask) { 3366 /* FIXME: Once EH migration is complete, 3367 * generate sense data in this function, 3368 * considering both err_mask and tf. 3369 */ 3370 if (qc->flags & ATA_QCFLAG_RETRY) 3371 ata_eh_qc_retry(qc); 3372 else 3373 ata_eh_qc_complete(qc); 3374 } else { 3375 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3376 ata_eh_qc_complete(qc); 3377 } else { 3378 /* feed zero TF to sense generation */ 3379 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3380 ata_eh_qc_retry(qc); 3381 } 3382 } 3383 } 3384 3385 /* make sure nr_active_links is zero after EH */ 3386 WARN_ON(ap->nr_active_links); 3387 ap->nr_active_links = 0; 3388 } 3389 3390 /** 3391 * ata_do_eh - do standard error handling 3392 * @ap: host port to handle error for 3393 * 3394 * @prereset: prereset method (can be NULL) 3395 * @softreset: softreset method (can be NULL) 3396 * @hardreset: hardreset method (can be NULL) 3397 * @postreset: postreset method (can be NULL) 3398 * 3399 * Perform standard error handling sequence. 3400 * 3401 * LOCKING: 3402 * Kernel thread context (may sleep). 3403 */ 3404 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 3405 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3406 ata_postreset_fn_t postreset) 3407 { 3408 struct ata_device *dev; 3409 int rc; 3410 3411 ata_eh_autopsy(ap); 3412 ata_eh_report(ap); 3413 3414 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 3415 NULL); 3416 if (rc) { 3417 ata_for_each_dev(dev, &ap->link, ALL) 3418 ata_dev_disable(dev); 3419 } 3420 3421 ata_eh_finish(ap); 3422 } 3423 3424 /** 3425 * ata_std_error_handler - standard error handler 3426 * @ap: host port to handle error for 3427 * 3428 * Standard error handler 3429 * 3430 * LOCKING: 3431 * Kernel thread context (may sleep). 3432 */ 3433 void ata_std_error_handler(struct ata_port *ap) 3434 { 3435 struct ata_port_operations *ops = ap->ops; 3436 ata_reset_fn_t hardreset = ops->hardreset; 3437 3438 /* ignore built-in hardreset if SCR access is not available */ 3439 if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) 3440 hardreset = NULL; 3441 3442 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 3443 } 3444 3445 #ifdef CONFIG_PM 3446 /** 3447 * ata_eh_handle_port_suspend - perform port suspend operation 3448 * @ap: port to suspend 3449 * 3450 * Suspend @ap. 3451 * 3452 * LOCKING: 3453 * Kernel thread context (may sleep). 3454 */ 3455 static void ata_eh_handle_port_suspend(struct ata_port *ap) 3456 { 3457 unsigned long flags; 3458 int rc = 0; 3459 3460 /* are we suspending? */ 3461 spin_lock_irqsave(ap->lock, flags); 3462 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3463 ap->pm_mesg.event == PM_EVENT_ON) { 3464 spin_unlock_irqrestore(ap->lock, flags); 3465 return; 3466 } 3467 spin_unlock_irqrestore(ap->lock, flags); 3468 3469 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 3470 3471 /* tell ACPI we're suspending */ 3472 rc = ata_acpi_on_suspend(ap); 3473 if (rc) 3474 goto out; 3475 3476 /* suspend */ 3477 ata_eh_freeze_port(ap); 3478 3479 if (ap->ops->port_suspend) 3480 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 3481 3482 ata_acpi_set_state(ap, PMSG_SUSPEND); 3483 out: 3484 /* report result */ 3485 spin_lock_irqsave(ap->lock, flags); 3486 3487 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 3488 if (rc == 0) 3489 ap->pflags |= ATA_PFLAG_SUSPENDED; 3490 else if (ap->pflags & ATA_PFLAG_FROZEN) 3491 ata_port_schedule_eh(ap); 3492 3493 if (ap->pm_result) { 3494 *ap->pm_result = rc; 3495 ap->pm_result = NULL; 3496 } 3497 3498 spin_unlock_irqrestore(ap->lock, flags); 3499 3500 return; 3501 } 3502 3503 /** 3504 * ata_eh_handle_port_resume - perform port resume operation 3505 * @ap: port to resume 3506 * 3507 * Resume @ap. 3508 * 3509 * LOCKING: 3510 * Kernel thread context (may sleep). 3511 */ 3512 static void ata_eh_handle_port_resume(struct ata_port *ap) 3513 { 3514 struct ata_link *link; 3515 struct ata_device *dev; 3516 unsigned long flags; 3517 int rc = 0; 3518 3519 /* are we resuming? */ 3520 spin_lock_irqsave(ap->lock, flags); 3521 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3522 ap->pm_mesg.event != PM_EVENT_ON) { 3523 spin_unlock_irqrestore(ap->lock, flags); 3524 return; 3525 } 3526 spin_unlock_irqrestore(ap->lock, flags); 3527 3528 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 3529 3530 /* 3531 * Error timestamps are in jiffies which doesn't run while 3532 * suspended and PHY events during resume isn't too uncommon. 3533 * When the two are combined, it can lead to unnecessary speed 3534 * downs if the machine is suspended and resumed repeatedly. 3535 * Clear error history. 3536 */ 3537 ata_for_each_link(link, ap, HOST_FIRST) 3538 ata_for_each_dev(dev, link, ALL) 3539 ata_ering_clear(&dev->ering); 3540 3541 ata_acpi_set_state(ap, PMSG_ON); 3542 3543 if (ap->ops->port_resume) 3544 rc = ap->ops->port_resume(ap); 3545 3546 /* tell ACPI that we're resuming */ 3547 ata_acpi_on_resume(ap); 3548 3549 /* report result */ 3550 spin_lock_irqsave(ap->lock, flags); 3551 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 3552 if (ap->pm_result) { 3553 *ap->pm_result = rc; 3554 ap->pm_result = NULL; 3555 } 3556 spin_unlock_irqrestore(ap->lock, flags); 3557 } 3558 #endif /* CONFIG_PM */ 3559