1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/pci.h> 38 #include <scsi/scsi.h> 39 #include <scsi/scsi_host.h> 40 #include <scsi/scsi_eh.h> 41 #include <scsi/scsi_device.h> 42 #include <scsi/scsi_cmnd.h> 43 #include "../scsi/scsi_transport_api.h" 44 45 #include <linux/libata.h> 46 47 #include "libata.h" 48 49 enum { 50 /* speed down verdicts */ 51 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 52 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 53 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 54 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 55 56 /* error flags */ 57 ATA_EFLAG_IS_IO = (1 << 0), 58 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 59 60 /* error categories */ 61 ATA_ECAT_NONE = 0, 62 ATA_ECAT_ATA_BUS = 1, 63 ATA_ECAT_TOUT_HSM = 2, 64 ATA_ECAT_UNK_DEV = 3, 65 ATA_ECAT_DUBIOUS_NONE = 4, 66 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 67 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 68 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 69 ATA_ECAT_NR = 8, 70 71 ATA_EH_CMD_DFL_TIMEOUT = 5000, 72 73 /* always put at least this amount of time between resets */ 74 ATA_EH_RESET_COOL_DOWN = 5000, 75 76 /* Waiting in ->prereset can never be reliable. It's 77 * sometimes nice to wait there but it can't be depended upon; 78 * otherwise, we wouldn't be resetting. Just give it enough 79 * time for most drives to spin up. 80 */ 81 ATA_EH_PRERESET_TIMEOUT = 10000, 82 ATA_EH_FASTDRAIN_INTERVAL = 3000, 83 84 ATA_EH_UA_TRIES = 5, 85 86 /* probe speed down parameters, see ata_eh_schedule_probe() */ 87 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 88 ATA_EH_PROBE_TRIALS = 2, 89 }; 90 91 /* The following table determines how we sequence resets. Each entry 92 * represents timeout for that try. The first try can be soft or 93 * hardreset. All others are hardreset if available. In most cases 94 * the first reset w/ 10sec timeout should succeed. Following entries 95 * are mostly for error handling, hotplug and retarded devices. 96 */ 97 static const unsigned long ata_eh_reset_timeouts[] = { 98 10000, /* most drives spin up by 10sec */ 99 10000, /* > 99% working drives spin up before 20sec */ 100 35000, /* give > 30 secs of idleness for retarded devices */ 101 5000, /* and sweet one last chance */ 102 ULONG_MAX, /* > 1 min has elapsed, give up */ 103 }; 104 105 static const unsigned long ata_eh_identify_timeouts[] = { 106 5000, /* covers > 99% of successes and not too boring on failures */ 107 10000, /* combined time till here is enough even for media access */ 108 30000, /* for true idiots */ 109 ULONG_MAX, 110 }; 111 112 static const unsigned long ata_eh_other_timeouts[] = { 113 5000, /* same rationale as identify timeout */ 114 10000, /* ditto */ 115 /* but no merciful 30sec for other commands, it just isn't worth it */ 116 ULONG_MAX, 117 }; 118 119 struct ata_eh_cmd_timeout_ent { 120 const u8 *commands; 121 const unsigned long *timeouts; 122 }; 123 124 /* The following table determines timeouts to use for EH internal 125 * commands. Each table entry is a command class and matches the 126 * commands the entry applies to and the timeout table to use. 127 * 128 * On the retry after a command timed out, the next timeout value from 129 * the table is used. If the table doesn't contain further entries, 130 * the last value is used. 131 * 132 * ehc->cmd_timeout_idx keeps track of which timeout to use per 133 * command class, so if SET_FEATURES times out on the first try, the 134 * next try will use the second timeout value only for that class. 135 */ 136 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 137 static const struct ata_eh_cmd_timeout_ent 138 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 139 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 140 .timeouts = ata_eh_identify_timeouts, }, 141 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 142 .timeouts = ata_eh_other_timeouts, }, 143 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 144 .timeouts = ata_eh_other_timeouts, }, 145 { .commands = CMDS(ATA_CMD_SET_FEATURES), 146 .timeouts = ata_eh_other_timeouts, }, 147 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 148 .timeouts = ata_eh_other_timeouts, }, 149 }; 150 #undef CMDS 151 152 static void __ata_port_freeze(struct ata_port *ap); 153 #ifdef CONFIG_PM 154 static void ata_eh_handle_port_suspend(struct ata_port *ap); 155 static void ata_eh_handle_port_resume(struct ata_port *ap); 156 #else /* CONFIG_PM */ 157 static void ata_eh_handle_port_suspend(struct ata_port *ap) 158 { } 159 160 static void ata_eh_handle_port_resume(struct ata_port *ap) 161 { } 162 #endif /* CONFIG_PM */ 163 164 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 165 va_list args) 166 { 167 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 168 ATA_EH_DESC_LEN - ehi->desc_len, 169 fmt, args); 170 } 171 172 /** 173 * __ata_ehi_push_desc - push error description without adding separator 174 * @ehi: target EHI 175 * @fmt: printf format string 176 * 177 * Format string according to @fmt and append it to @ehi->desc. 178 * 179 * LOCKING: 180 * spin_lock_irqsave(host lock) 181 */ 182 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 183 { 184 va_list args; 185 186 va_start(args, fmt); 187 __ata_ehi_pushv_desc(ehi, fmt, args); 188 va_end(args); 189 } 190 191 /** 192 * ata_ehi_push_desc - push error description with separator 193 * @ehi: target EHI 194 * @fmt: printf format string 195 * 196 * Format string according to @fmt and append it to @ehi->desc. 197 * If @ehi->desc is not empty, ", " is added in-between. 198 * 199 * LOCKING: 200 * spin_lock_irqsave(host lock) 201 */ 202 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 203 { 204 va_list args; 205 206 if (ehi->desc_len) 207 __ata_ehi_push_desc(ehi, ", "); 208 209 va_start(args, fmt); 210 __ata_ehi_pushv_desc(ehi, fmt, args); 211 va_end(args); 212 } 213 214 /** 215 * ata_ehi_clear_desc - clean error description 216 * @ehi: target EHI 217 * 218 * Clear @ehi->desc. 219 * 220 * LOCKING: 221 * spin_lock_irqsave(host lock) 222 */ 223 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 224 { 225 ehi->desc[0] = '\0'; 226 ehi->desc_len = 0; 227 } 228 229 /** 230 * ata_port_desc - append port description 231 * @ap: target ATA port 232 * @fmt: printf format string 233 * 234 * Format string according to @fmt and append it to port 235 * description. If port description is not empty, " " is added 236 * in-between. This function is to be used while initializing 237 * ata_host. The description is printed on host registration. 238 * 239 * LOCKING: 240 * None. 241 */ 242 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 243 { 244 va_list args; 245 246 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 247 248 if (ap->link.eh_info.desc_len) 249 __ata_ehi_push_desc(&ap->link.eh_info, " "); 250 251 va_start(args, fmt); 252 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 253 va_end(args); 254 } 255 256 #ifdef CONFIG_PCI 257 258 /** 259 * ata_port_pbar_desc - append PCI BAR description 260 * @ap: target ATA port 261 * @bar: target PCI BAR 262 * @offset: offset into PCI BAR 263 * @name: name of the area 264 * 265 * If @offset is negative, this function formats a string which 266 * contains the name, address, size and type of the BAR and 267 * appends it to the port description. If @offset is zero or 268 * positive, only name and offsetted address is appended. 269 * 270 * LOCKING: 271 * None. 272 */ 273 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 274 const char *name) 275 { 276 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 277 char *type = ""; 278 unsigned long long start, len; 279 280 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 281 type = "m"; 282 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 283 type = "i"; 284 285 start = (unsigned long long)pci_resource_start(pdev, bar); 286 len = (unsigned long long)pci_resource_len(pdev, bar); 287 288 if (offset < 0) 289 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 290 else 291 ata_port_desc(ap, "%s 0x%llx", name, 292 start + (unsigned long long)offset); 293 } 294 295 #endif /* CONFIG_PCI */ 296 297 static int ata_lookup_timeout_table(u8 cmd) 298 { 299 int i; 300 301 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 302 const u8 *cur; 303 304 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 305 if (*cur == cmd) 306 return i; 307 } 308 309 return -1; 310 } 311 312 /** 313 * ata_internal_cmd_timeout - determine timeout for an internal command 314 * @dev: target device 315 * @cmd: internal command to be issued 316 * 317 * Determine timeout for internal command @cmd for @dev. 318 * 319 * LOCKING: 320 * EH context. 321 * 322 * RETURNS: 323 * Determined timeout. 324 */ 325 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 326 { 327 struct ata_eh_context *ehc = &dev->link->eh_context; 328 int ent = ata_lookup_timeout_table(cmd); 329 int idx; 330 331 if (ent < 0) 332 return ATA_EH_CMD_DFL_TIMEOUT; 333 334 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 335 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 336 } 337 338 /** 339 * ata_internal_cmd_timed_out - notification for internal command timeout 340 * @dev: target device 341 * @cmd: internal command which timed out 342 * 343 * Notify EH that internal command @cmd for @dev timed out. This 344 * function should be called only for commands whose timeouts are 345 * determined using ata_internal_cmd_timeout(). 346 * 347 * LOCKING: 348 * EH context. 349 */ 350 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 351 { 352 struct ata_eh_context *ehc = &dev->link->eh_context; 353 int ent = ata_lookup_timeout_table(cmd); 354 int idx; 355 356 if (ent < 0) 357 return; 358 359 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 360 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 361 ehc->cmd_timeout_idx[dev->devno][ent]++; 362 } 363 364 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 365 unsigned int err_mask) 366 { 367 struct ata_ering_entry *ent; 368 369 WARN_ON(!err_mask); 370 371 ering->cursor++; 372 ering->cursor %= ATA_ERING_SIZE; 373 374 ent = &ering->ring[ering->cursor]; 375 ent->eflags = eflags; 376 ent->err_mask = err_mask; 377 ent->timestamp = get_jiffies_64(); 378 } 379 380 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 381 { 382 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 383 384 if (ent->err_mask) 385 return ent; 386 return NULL; 387 } 388 389 static void ata_ering_clear(struct ata_ering *ering) 390 { 391 memset(ering, 0, sizeof(*ering)); 392 } 393 394 static int ata_ering_map(struct ata_ering *ering, 395 int (*map_fn)(struct ata_ering_entry *, void *), 396 void *arg) 397 { 398 int idx, rc = 0; 399 struct ata_ering_entry *ent; 400 401 idx = ering->cursor; 402 do { 403 ent = &ering->ring[idx]; 404 if (!ent->err_mask) 405 break; 406 rc = map_fn(ent, arg); 407 if (rc) 408 break; 409 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 410 } while (idx != ering->cursor); 411 412 return rc; 413 } 414 415 static unsigned int ata_eh_dev_action(struct ata_device *dev) 416 { 417 struct ata_eh_context *ehc = &dev->link->eh_context; 418 419 return ehc->i.action | ehc->i.dev_action[dev->devno]; 420 } 421 422 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 423 struct ata_eh_info *ehi, unsigned int action) 424 { 425 struct ata_device *tdev; 426 427 if (!dev) { 428 ehi->action &= ~action; 429 ata_for_each_dev(tdev, link, ALL) 430 ehi->dev_action[tdev->devno] &= ~action; 431 } else { 432 /* doesn't make sense for port-wide EH actions */ 433 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 434 435 /* break ehi->action into ehi->dev_action */ 436 if (ehi->action & action) { 437 ata_for_each_dev(tdev, link, ALL) 438 ehi->dev_action[tdev->devno] |= 439 ehi->action & action; 440 ehi->action &= ~action; 441 } 442 443 /* turn off the specified per-dev action */ 444 ehi->dev_action[dev->devno] &= ~action; 445 } 446 } 447 448 /** 449 * ata_scsi_timed_out - SCSI layer time out callback 450 * @cmd: timed out SCSI command 451 * 452 * Handles SCSI layer timeout. We race with normal completion of 453 * the qc for @cmd. If the qc is already gone, we lose and let 454 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 455 * timed out and EH should be invoked. Prevent ata_qc_complete() 456 * from finishing it by setting EH_SCHEDULED and return 457 * EH_NOT_HANDLED. 458 * 459 * TODO: kill this function once old EH is gone. 460 * 461 * LOCKING: 462 * Called from timer context 463 * 464 * RETURNS: 465 * EH_HANDLED or EH_NOT_HANDLED 466 */ 467 enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 468 { 469 struct Scsi_Host *host = cmd->device->host; 470 struct ata_port *ap = ata_shost_to_port(host); 471 unsigned long flags; 472 struct ata_queued_cmd *qc; 473 enum blk_eh_timer_return ret; 474 475 DPRINTK("ENTER\n"); 476 477 if (ap->ops->error_handler) { 478 ret = BLK_EH_NOT_HANDLED; 479 goto out; 480 } 481 482 ret = BLK_EH_HANDLED; 483 spin_lock_irqsave(ap->lock, flags); 484 qc = ata_qc_from_tag(ap, ap->link.active_tag); 485 if (qc) { 486 WARN_ON(qc->scsicmd != cmd); 487 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 488 qc->err_mask |= AC_ERR_TIMEOUT; 489 ret = BLK_EH_NOT_HANDLED; 490 } 491 spin_unlock_irqrestore(ap->lock, flags); 492 493 out: 494 DPRINTK("EXIT, ret=%d\n", ret); 495 return ret; 496 } 497 498 static void ata_eh_unload(struct ata_port *ap) 499 { 500 struct ata_link *link; 501 struct ata_device *dev; 502 unsigned long flags; 503 504 /* Restore SControl IPM and SPD for the next driver and 505 * disable attached devices. 506 */ 507 ata_for_each_link(link, ap, PMP_FIRST) { 508 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 509 ata_for_each_dev(dev, link, ALL) 510 ata_dev_disable(dev); 511 } 512 513 /* freeze and set UNLOADED */ 514 spin_lock_irqsave(ap->lock, flags); 515 516 ata_port_freeze(ap); /* won't be thawed */ 517 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 518 ap->pflags |= ATA_PFLAG_UNLOADED; 519 520 spin_unlock_irqrestore(ap->lock, flags); 521 } 522 523 /** 524 * ata_scsi_error - SCSI layer error handler callback 525 * @host: SCSI host on which error occurred 526 * 527 * Handles SCSI-layer-thrown error events. 528 * 529 * LOCKING: 530 * Inherited from SCSI layer (none, can sleep) 531 * 532 * RETURNS: 533 * Zero. 534 */ 535 void ata_scsi_error(struct Scsi_Host *host) 536 { 537 struct ata_port *ap = ata_shost_to_port(host); 538 int i; 539 unsigned long flags; 540 541 DPRINTK("ENTER\n"); 542 543 /* synchronize with port task */ 544 ata_port_flush_task(ap); 545 546 /* synchronize with host lock and sort out timeouts */ 547 548 /* For new EH, all qcs are finished in one of three ways - 549 * normal completion, error completion, and SCSI timeout. 550 * Both cmpletions can race against SCSI timeout. When normal 551 * completion wins, the qc never reaches EH. When error 552 * completion wins, the qc has ATA_QCFLAG_FAILED set. 553 * 554 * When SCSI timeout wins, things are a bit more complex. 555 * Normal or error completion can occur after the timeout but 556 * before this point. In such cases, both types of 557 * completions are honored. A scmd is determined to have 558 * timed out iff its associated qc is active and not failed. 559 */ 560 if (ap->ops->error_handler) { 561 struct scsi_cmnd *scmd, *tmp; 562 int nr_timedout = 0; 563 564 spin_lock_irqsave(ap->lock, flags); 565 566 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 567 struct ata_queued_cmd *qc; 568 569 for (i = 0; i < ATA_MAX_QUEUE; i++) { 570 qc = __ata_qc_from_tag(ap, i); 571 if (qc->flags & ATA_QCFLAG_ACTIVE && 572 qc->scsicmd == scmd) 573 break; 574 } 575 576 if (i < ATA_MAX_QUEUE) { 577 /* the scmd has an associated qc */ 578 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 579 /* which hasn't failed yet, timeout */ 580 qc->err_mask |= AC_ERR_TIMEOUT; 581 qc->flags |= ATA_QCFLAG_FAILED; 582 nr_timedout++; 583 } 584 } else { 585 /* Normal completion occurred after 586 * SCSI timeout but before this point. 587 * Successfully complete it. 588 */ 589 scmd->retries = scmd->allowed; 590 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 591 } 592 } 593 594 /* If we have timed out qcs. They belong to EH from 595 * this point but the state of the controller is 596 * unknown. Freeze the port to make sure the IRQ 597 * handler doesn't diddle with those qcs. This must 598 * be done atomically w.r.t. setting QCFLAG_FAILED. 599 */ 600 if (nr_timedout) 601 __ata_port_freeze(ap); 602 603 spin_unlock_irqrestore(ap->lock, flags); 604 605 /* initialize eh_tries */ 606 ap->eh_tries = ATA_EH_MAX_TRIES; 607 } else 608 spin_unlock_wait(ap->lock); 609 610 repeat: 611 /* invoke error handler */ 612 if (ap->ops->error_handler) { 613 struct ata_link *link; 614 615 /* kill fast drain timer */ 616 del_timer_sync(&ap->fastdrain_timer); 617 618 /* process port resume request */ 619 ata_eh_handle_port_resume(ap); 620 621 /* fetch & clear EH info */ 622 spin_lock_irqsave(ap->lock, flags); 623 624 ata_for_each_link(link, ap, HOST_FIRST) { 625 struct ata_eh_context *ehc = &link->eh_context; 626 struct ata_device *dev; 627 628 memset(&link->eh_context, 0, sizeof(link->eh_context)); 629 link->eh_context.i = link->eh_info; 630 memset(&link->eh_info, 0, sizeof(link->eh_info)); 631 632 ata_for_each_dev(dev, link, ENABLED) { 633 int devno = dev->devno; 634 635 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 636 if (ata_ncq_enabled(dev)) 637 ehc->saved_ncq_enabled |= 1 << devno; 638 } 639 } 640 641 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 642 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 643 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 644 645 spin_unlock_irqrestore(ap->lock, flags); 646 647 /* invoke EH, skip if unloading or suspended */ 648 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 649 ap->ops->error_handler(ap); 650 else { 651 /* if unloading, commence suicide */ 652 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 653 !(ap->pflags & ATA_PFLAG_UNLOADED)) 654 ata_eh_unload(ap); 655 ata_eh_finish(ap); 656 } 657 658 /* process port suspend request */ 659 ata_eh_handle_port_suspend(ap); 660 661 /* Exception might have happend after ->error_handler 662 * recovered the port but before this point. Repeat 663 * EH in such case. 664 */ 665 spin_lock_irqsave(ap->lock, flags); 666 667 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 668 if (--ap->eh_tries) { 669 spin_unlock_irqrestore(ap->lock, flags); 670 goto repeat; 671 } 672 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 673 "tries, giving up\n", ATA_EH_MAX_TRIES); 674 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 675 } 676 677 /* this run is complete, make sure EH info is clear */ 678 ata_for_each_link(link, ap, HOST_FIRST) 679 memset(&link->eh_info, 0, sizeof(link->eh_info)); 680 681 /* Clear host_eh_scheduled while holding ap->lock such 682 * that if exception occurs after this point but 683 * before EH completion, SCSI midlayer will 684 * re-initiate EH. 685 */ 686 host->host_eh_scheduled = 0; 687 688 spin_unlock_irqrestore(ap->lock, flags); 689 } else { 690 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 691 ap->ops->eng_timeout(ap); 692 } 693 694 /* finish or retry handled scmd's and clean up */ 695 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 696 697 scsi_eh_flush_done_q(&ap->eh_done_q); 698 699 /* clean up */ 700 spin_lock_irqsave(ap->lock, flags); 701 702 if (ap->pflags & ATA_PFLAG_LOADING) 703 ap->pflags &= ~ATA_PFLAG_LOADING; 704 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 705 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 706 707 if (ap->pflags & ATA_PFLAG_RECOVERED) 708 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 709 710 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 711 712 /* tell wait_eh that we're done */ 713 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 714 wake_up_all(&ap->eh_wait_q); 715 716 spin_unlock_irqrestore(ap->lock, flags); 717 718 DPRINTK("EXIT\n"); 719 } 720 721 /** 722 * ata_port_wait_eh - Wait for the currently pending EH to complete 723 * @ap: Port to wait EH for 724 * 725 * Wait until the currently pending EH is complete. 726 * 727 * LOCKING: 728 * Kernel thread context (may sleep). 729 */ 730 void ata_port_wait_eh(struct ata_port *ap) 731 { 732 unsigned long flags; 733 DEFINE_WAIT(wait); 734 735 retry: 736 spin_lock_irqsave(ap->lock, flags); 737 738 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 739 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 740 spin_unlock_irqrestore(ap->lock, flags); 741 schedule(); 742 spin_lock_irqsave(ap->lock, flags); 743 } 744 finish_wait(&ap->eh_wait_q, &wait); 745 746 spin_unlock_irqrestore(ap->lock, flags); 747 748 /* make sure SCSI EH is complete */ 749 if (scsi_host_in_recovery(ap->scsi_host)) { 750 msleep(10); 751 goto retry; 752 } 753 } 754 755 static int ata_eh_nr_in_flight(struct ata_port *ap) 756 { 757 unsigned int tag; 758 int nr = 0; 759 760 /* count only non-internal commands */ 761 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 762 if (ata_qc_from_tag(ap, tag)) 763 nr++; 764 765 return nr; 766 } 767 768 void ata_eh_fastdrain_timerfn(unsigned long arg) 769 { 770 struct ata_port *ap = (void *)arg; 771 unsigned long flags; 772 int cnt; 773 774 spin_lock_irqsave(ap->lock, flags); 775 776 cnt = ata_eh_nr_in_flight(ap); 777 778 /* are we done? */ 779 if (!cnt) 780 goto out_unlock; 781 782 if (cnt == ap->fastdrain_cnt) { 783 unsigned int tag; 784 785 /* No progress during the last interval, tag all 786 * in-flight qcs as timed out and freeze the port. 787 */ 788 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 789 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 790 if (qc) 791 qc->err_mask |= AC_ERR_TIMEOUT; 792 } 793 794 ata_port_freeze(ap); 795 } else { 796 /* some qcs have finished, give it another chance */ 797 ap->fastdrain_cnt = cnt; 798 ap->fastdrain_timer.expires = 799 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 800 add_timer(&ap->fastdrain_timer); 801 } 802 803 out_unlock: 804 spin_unlock_irqrestore(ap->lock, flags); 805 } 806 807 /** 808 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 809 * @ap: target ATA port 810 * @fastdrain: activate fast drain 811 * 812 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 813 * is non-zero and EH wasn't pending before. Fast drain ensures 814 * that EH kicks in in timely manner. 815 * 816 * LOCKING: 817 * spin_lock_irqsave(host lock) 818 */ 819 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 820 { 821 int cnt; 822 823 /* already scheduled? */ 824 if (ap->pflags & ATA_PFLAG_EH_PENDING) 825 return; 826 827 ap->pflags |= ATA_PFLAG_EH_PENDING; 828 829 if (!fastdrain) 830 return; 831 832 /* do we have in-flight qcs? */ 833 cnt = ata_eh_nr_in_flight(ap); 834 if (!cnt) 835 return; 836 837 /* activate fast drain */ 838 ap->fastdrain_cnt = cnt; 839 ap->fastdrain_timer.expires = 840 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 841 add_timer(&ap->fastdrain_timer); 842 } 843 844 /** 845 * ata_qc_schedule_eh - schedule qc for error handling 846 * @qc: command to schedule error handling for 847 * 848 * Schedule error handling for @qc. EH will kick in as soon as 849 * other commands are drained. 850 * 851 * LOCKING: 852 * spin_lock_irqsave(host lock) 853 */ 854 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 855 { 856 struct ata_port *ap = qc->ap; 857 858 WARN_ON(!ap->ops->error_handler); 859 860 qc->flags |= ATA_QCFLAG_FAILED; 861 ata_eh_set_pending(ap, 1); 862 863 /* The following will fail if timeout has already expired. 864 * ata_scsi_error() takes care of such scmds on EH entry. 865 * Note that ATA_QCFLAG_FAILED is unconditionally set after 866 * this function completes. 867 */ 868 blk_abort_request(qc->scsicmd->request); 869 } 870 871 /** 872 * ata_port_schedule_eh - schedule error handling without a qc 873 * @ap: ATA port to schedule EH for 874 * 875 * Schedule error handling for @ap. EH will kick in as soon as 876 * all commands are drained. 877 * 878 * LOCKING: 879 * spin_lock_irqsave(host lock) 880 */ 881 void ata_port_schedule_eh(struct ata_port *ap) 882 { 883 WARN_ON(!ap->ops->error_handler); 884 885 if (ap->pflags & ATA_PFLAG_INITIALIZING) 886 return; 887 888 ata_eh_set_pending(ap, 1); 889 scsi_schedule_eh(ap->scsi_host); 890 891 DPRINTK("port EH scheduled\n"); 892 } 893 894 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 895 { 896 int tag, nr_aborted = 0; 897 898 WARN_ON(!ap->ops->error_handler); 899 900 /* we're gonna abort all commands, no need for fast drain */ 901 ata_eh_set_pending(ap, 0); 902 903 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 904 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 905 906 if (qc && (!link || qc->dev->link == link)) { 907 qc->flags |= ATA_QCFLAG_FAILED; 908 ata_qc_complete(qc); 909 nr_aborted++; 910 } 911 } 912 913 if (!nr_aborted) 914 ata_port_schedule_eh(ap); 915 916 return nr_aborted; 917 } 918 919 /** 920 * ata_link_abort - abort all qc's on the link 921 * @link: ATA link to abort qc's for 922 * 923 * Abort all active qc's active on @link and schedule EH. 924 * 925 * LOCKING: 926 * spin_lock_irqsave(host lock) 927 * 928 * RETURNS: 929 * Number of aborted qc's. 930 */ 931 int ata_link_abort(struct ata_link *link) 932 { 933 return ata_do_link_abort(link->ap, link); 934 } 935 936 /** 937 * ata_port_abort - abort all qc's on the port 938 * @ap: ATA port to abort qc's for 939 * 940 * Abort all active qc's of @ap and schedule EH. 941 * 942 * LOCKING: 943 * spin_lock_irqsave(host_set lock) 944 * 945 * RETURNS: 946 * Number of aborted qc's. 947 */ 948 int ata_port_abort(struct ata_port *ap) 949 { 950 return ata_do_link_abort(ap, NULL); 951 } 952 953 /** 954 * __ata_port_freeze - freeze port 955 * @ap: ATA port to freeze 956 * 957 * This function is called when HSM violation or some other 958 * condition disrupts normal operation of the port. Frozen port 959 * is not allowed to perform any operation until the port is 960 * thawed, which usually follows a successful reset. 961 * 962 * ap->ops->freeze() callback can be used for freezing the port 963 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 964 * port cannot be frozen hardware-wise, the interrupt handler 965 * must ack and clear interrupts unconditionally while the port 966 * is frozen. 967 * 968 * LOCKING: 969 * spin_lock_irqsave(host lock) 970 */ 971 static void __ata_port_freeze(struct ata_port *ap) 972 { 973 WARN_ON(!ap->ops->error_handler); 974 975 if (ap->ops->freeze) 976 ap->ops->freeze(ap); 977 978 ap->pflags |= ATA_PFLAG_FROZEN; 979 980 DPRINTK("ata%u port frozen\n", ap->print_id); 981 } 982 983 /** 984 * ata_port_freeze - abort & freeze port 985 * @ap: ATA port to freeze 986 * 987 * Abort and freeze @ap. 988 * 989 * LOCKING: 990 * spin_lock_irqsave(host lock) 991 * 992 * RETURNS: 993 * Number of aborted commands. 994 */ 995 int ata_port_freeze(struct ata_port *ap) 996 { 997 int nr_aborted; 998 999 WARN_ON(!ap->ops->error_handler); 1000 1001 nr_aborted = ata_port_abort(ap); 1002 __ata_port_freeze(ap); 1003 1004 return nr_aborted; 1005 } 1006 1007 /** 1008 * sata_async_notification - SATA async notification handler 1009 * @ap: ATA port where async notification is received 1010 * 1011 * Handler to be called when async notification via SDB FIS is 1012 * received. This function schedules EH if necessary. 1013 * 1014 * LOCKING: 1015 * spin_lock_irqsave(host lock) 1016 * 1017 * RETURNS: 1018 * 1 if EH is scheduled, 0 otherwise. 1019 */ 1020 int sata_async_notification(struct ata_port *ap) 1021 { 1022 u32 sntf; 1023 int rc; 1024 1025 if (!(ap->flags & ATA_FLAG_AN)) 1026 return 0; 1027 1028 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1029 if (rc == 0) 1030 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1031 1032 if (!sata_pmp_attached(ap) || rc) { 1033 /* PMP is not attached or SNTF is not available */ 1034 if (!sata_pmp_attached(ap)) { 1035 /* PMP is not attached. Check whether ATAPI 1036 * AN is configured. If so, notify media 1037 * change. 1038 */ 1039 struct ata_device *dev = ap->link.device; 1040 1041 if ((dev->class == ATA_DEV_ATAPI) && 1042 (dev->flags & ATA_DFLAG_AN)) 1043 ata_scsi_media_change_notify(dev); 1044 return 0; 1045 } else { 1046 /* PMP is attached but SNTF is not available. 1047 * ATAPI async media change notification is 1048 * not used. The PMP must be reporting PHY 1049 * status change, schedule EH. 1050 */ 1051 ata_port_schedule_eh(ap); 1052 return 1; 1053 } 1054 } else { 1055 /* PMP is attached and SNTF is available */ 1056 struct ata_link *link; 1057 1058 /* check and notify ATAPI AN */ 1059 ata_for_each_link(link, ap, EDGE) { 1060 if (!(sntf & (1 << link->pmp))) 1061 continue; 1062 1063 if ((link->device->class == ATA_DEV_ATAPI) && 1064 (link->device->flags & ATA_DFLAG_AN)) 1065 ata_scsi_media_change_notify(link->device); 1066 } 1067 1068 /* If PMP is reporting that PHY status of some 1069 * downstream ports has changed, schedule EH. 1070 */ 1071 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1072 ata_port_schedule_eh(ap); 1073 return 1; 1074 } 1075 1076 return 0; 1077 } 1078 } 1079 1080 /** 1081 * ata_eh_freeze_port - EH helper to freeze port 1082 * @ap: ATA port to freeze 1083 * 1084 * Freeze @ap. 1085 * 1086 * LOCKING: 1087 * None. 1088 */ 1089 void ata_eh_freeze_port(struct ata_port *ap) 1090 { 1091 unsigned long flags; 1092 1093 if (!ap->ops->error_handler) 1094 return; 1095 1096 spin_lock_irqsave(ap->lock, flags); 1097 __ata_port_freeze(ap); 1098 spin_unlock_irqrestore(ap->lock, flags); 1099 } 1100 1101 /** 1102 * ata_port_thaw_port - EH helper to thaw port 1103 * @ap: ATA port to thaw 1104 * 1105 * Thaw frozen port @ap. 1106 * 1107 * LOCKING: 1108 * None. 1109 */ 1110 void ata_eh_thaw_port(struct ata_port *ap) 1111 { 1112 unsigned long flags; 1113 1114 if (!ap->ops->error_handler) 1115 return; 1116 1117 spin_lock_irqsave(ap->lock, flags); 1118 1119 ap->pflags &= ~ATA_PFLAG_FROZEN; 1120 1121 if (ap->ops->thaw) 1122 ap->ops->thaw(ap); 1123 1124 spin_unlock_irqrestore(ap->lock, flags); 1125 1126 DPRINTK("ata%u port thawed\n", ap->print_id); 1127 } 1128 1129 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1130 { 1131 /* nada */ 1132 } 1133 1134 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1135 { 1136 struct ata_port *ap = qc->ap; 1137 struct scsi_cmnd *scmd = qc->scsicmd; 1138 unsigned long flags; 1139 1140 spin_lock_irqsave(ap->lock, flags); 1141 qc->scsidone = ata_eh_scsidone; 1142 __ata_qc_complete(qc); 1143 WARN_ON(ata_tag_valid(qc->tag)); 1144 spin_unlock_irqrestore(ap->lock, flags); 1145 1146 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1147 } 1148 1149 /** 1150 * ata_eh_qc_complete - Complete an active ATA command from EH 1151 * @qc: Command to complete 1152 * 1153 * Indicate to the mid and upper layers that an ATA command has 1154 * completed. To be used from EH. 1155 */ 1156 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1157 { 1158 struct scsi_cmnd *scmd = qc->scsicmd; 1159 scmd->retries = scmd->allowed; 1160 __ata_eh_qc_complete(qc); 1161 } 1162 1163 /** 1164 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1165 * @qc: Command to retry 1166 * 1167 * Indicate to the mid and upper layers that an ATA command 1168 * should be retried. To be used from EH. 1169 * 1170 * SCSI midlayer limits the number of retries to scmd->allowed. 1171 * scmd->retries is decremented for commands which get retried 1172 * due to unrelated failures (qc->err_mask is zero). 1173 */ 1174 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1175 { 1176 struct scsi_cmnd *scmd = qc->scsicmd; 1177 if (!qc->err_mask && scmd->retries) 1178 scmd->retries--; 1179 __ata_eh_qc_complete(qc); 1180 } 1181 1182 /** 1183 * ata_dev_disable - disable ATA device 1184 * @dev: ATA device to disable 1185 * 1186 * Disable @dev. 1187 * 1188 * Locking: 1189 * EH context. 1190 */ 1191 void ata_dev_disable(struct ata_device *dev) 1192 { 1193 if (!ata_dev_enabled(dev)) 1194 return; 1195 1196 if (ata_msg_drv(dev->link->ap)) 1197 ata_dev_printk(dev, KERN_WARNING, "disabled\n"); 1198 ata_acpi_on_disable(dev); 1199 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1200 dev->class++; 1201 1202 /* From now till the next successful probe, ering is used to 1203 * track probe failures. Clear accumulated device error info. 1204 */ 1205 ata_ering_clear(&dev->ering); 1206 } 1207 1208 /** 1209 * ata_eh_detach_dev - detach ATA device 1210 * @dev: ATA device to detach 1211 * 1212 * Detach @dev. 1213 * 1214 * LOCKING: 1215 * None. 1216 */ 1217 void ata_eh_detach_dev(struct ata_device *dev) 1218 { 1219 struct ata_link *link = dev->link; 1220 struct ata_port *ap = link->ap; 1221 struct ata_eh_context *ehc = &link->eh_context; 1222 unsigned long flags; 1223 1224 ata_dev_disable(dev); 1225 1226 spin_lock_irqsave(ap->lock, flags); 1227 1228 dev->flags &= ~ATA_DFLAG_DETACH; 1229 1230 if (ata_scsi_offline_dev(dev)) { 1231 dev->flags |= ATA_DFLAG_DETACHED; 1232 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1233 } 1234 1235 /* clear per-dev EH info */ 1236 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1237 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1238 ehc->saved_xfer_mode[dev->devno] = 0; 1239 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1240 1241 spin_unlock_irqrestore(ap->lock, flags); 1242 } 1243 1244 /** 1245 * ata_eh_about_to_do - about to perform eh_action 1246 * @link: target ATA link 1247 * @dev: target ATA dev for per-dev action (can be NULL) 1248 * @action: action about to be performed 1249 * 1250 * Called just before performing EH actions to clear related bits 1251 * in @link->eh_info such that eh actions are not unnecessarily 1252 * repeated. 1253 * 1254 * LOCKING: 1255 * None. 1256 */ 1257 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1258 unsigned int action) 1259 { 1260 struct ata_port *ap = link->ap; 1261 struct ata_eh_info *ehi = &link->eh_info; 1262 struct ata_eh_context *ehc = &link->eh_context; 1263 unsigned long flags; 1264 1265 spin_lock_irqsave(ap->lock, flags); 1266 1267 ata_eh_clear_action(link, dev, ehi, action); 1268 1269 /* About to take EH action, set RECOVERED. Ignore actions on 1270 * slave links as master will do them again. 1271 */ 1272 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1273 ap->pflags |= ATA_PFLAG_RECOVERED; 1274 1275 spin_unlock_irqrestore(ap->lock, flags); 1276 } 1277 1278 /** 1279 * ata_eh_done - EH action complete 1280 * @ap: target ATA port 1281 * @dev: target ATA dev for per-dev action (can be NULL) 1282 * @action: action just completed 1283 * 1284 * Called right after performing EH actions to clear related bits 1285 * in @link->eh_context. 1286 * 1287 * LOCKING: 1288 * None. 1289 */ 1290 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1291 unsigned int action) 1292 { 1293 struct ata_eh_context *ehc = &link->eh_context; 1294 1295 ata_eh_clear_action(link, dev, &ehc->i, action); 1296 } 1297 1298 /** 1299 * ata_err_string - convert err_mask to descriptive string 1300 * @err_mask: error mask to convert to string 1301 * 1302 * Convert @err_mask to descriptive string. Errors are 1303 * prioritized according to severity and only the most severe 1304 * error is reported. 1305 * 1306 * LOCKING: 1307 * None. 1308 * 1309 * RETURNS: 1310 * Descriptive string for @err_mask 1311 */ 1312 static const char *ata_err_string(unsigned int err_mask) 1313 { 1314 if (err_mask & AC_ERR_HOST_BUS) 1315 return "host bus error"; 1316 if (err_mask & AC_ERR_ATA_BUS) 1317 return "ATA bus error"; 1318 if (err_mask & AC_ERR_TIMEOUT) 1319 return "timeout"; 1320 if (err_mask & AC_ERR_HSM) 1321 return "HSM violation"; 1322 if (err_mask & AC_ERR_SYSTEM) 1323 return "internal error"; 1324 if (err_mask & AC_ERR_MEDIA) 1325 return "media error"; 1326 if (err_mask & AC_ERR_INVALID) 1327 return "invalid argument"; 1328 if (err_mask & AC_ERR_DEV) 1329 return "device error"; 1330 return "unknown error"; 1331 } 1332 1333 /** 1334 * ata_read_log_page - read a specific log page 1335 * @dev: target device 1336 * @page: page to read 1337 * @buf: buffer to store read page 1338 * @sectors: number of sectors to read 1339 * 1340 * Read log page using READ_LOG_EXT command. 1341 * 1342 * LOCKING: 1343 * Kernel thread context (may sleep). 1344 * 1345 * RETURNS: 1346 * 0 on success, AC_ERR_* mask otherwise. 1347 */ 1348 static unsigned int ata_read_log_page(struct ata_device *dev, 1349 u8 page, void *buf, unsigned int sectors) 1350 { 1351 struct ata_taskfile tf; 1352 unsigned int err_mask; 1353 1354 DPRINTK("read log page - page %d\n", page); 1355 1356 ata_tf_init(dev, &tf); 1357 tf.command = ATA_CMD_READ_LOG_EXT; 1358 tf.lbal = page; 1359 tf.nsect = sectors; 1360 tf.hob_nsect = sectors >> 8; 1361 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1362 tf.protocol = ATA_PROT_PIO; 1363 1364 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1365 buf, sectors * ATA_SECT_SIZE, 0); 1366 1367 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1368 return err_mask; 1369 } 1370 1371 /** 1372 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1373 * @dev: Device to read log page 10h from 1374 * @tag: Resulting tag of the failed command 1375 * @tf: Resulting taskfile registers of the failed command 1376 * 1377 * Read log page 10h to obtain NCQ error details and clear error 1378 * condition. 1379 * 1380 * LOCKING: 1381 * Kernel thread context (may sleep). 1382 * 1383 * RETURNS: 1384 * 0 on success, -errno otherwise. 1385 */ 1386 static int ata_eh_read_log_10h(struct ata_device *dev, 1387 int *tag, struct ata_taskfile *tf) 1388 { 1389 u8 *buf = dev->link->ap->sector_buf; 1390 unsigned int err_mask; 1391 u8 csum; 1392 int i; 1393 1394 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1395 if (err_mask) 1396 return -EIO; 1397 1398 csum = 0; 1399 for (i = 0; i < ATA_SECT_SIZE; i++) 1400 csum += buf[i]; 1401 if (csum) 1402 ata_dev_printk(dev, KERN_WARNING, 1403 "invalid checksum 0x%x on log page 10h\n", csum); 1404 1405 if (buf[0] & 0x80) 1406 return -ENOENT; 1407 1408 *tag = buf[0] & 0x1f; 1409 1410 tf->command = buf[2]; 1411 tf->feature = buf[3]; 1412 tf->lbal = buf[4]; 1413 tf->lbam = buf[5]; 1414 tf->lbah = buf[6]; 1415 tf->device = buf[7]; 1416 tf->hob_lbal = buf[8]; 1417 tf->hob_lbam = buf[9]; 1418 tf->hob_lbah = buf[10]; 1419 tf->nsect = buf[12]; 1420 tf->hob_nsect = buf[13]; 1421 1422 return 0; 1423 } 1424 1425 /** 1426 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1427 * @dev: target ATAPI device 1428 * @r_sense_key: out parameter for sense_key 1429 * 1430 * Perform ATAPI TEST_UNIT_READY. 1431 * 1432 * LOCKING: 1433 * EH context (may sleep). 1434 * 1435 * RETURNS: 1436 * 0 on success, AC_ERR_* mask on failure. 1437 */ 1438 static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1439 { 1440 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1441 struct ata_taskfile tf; 1442 unsigned int err_mask; 1443 1444 ata_tf_init(dev, &tf); 1445 1446 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1447 tf.command = ATA_CMD_PACKET; 1448 tf.protocol = ATAPI_PROT_NODATA; 1449 1450 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1451 if (err_mask == AC_ERR_DEV) 1452 *r_sense_key = tf.feature >> 4; 1453 return err_mask; 1454 } 1455 1456 /** 1457 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1458 * @dev: device to perform REQUEST_SENSE to 1459 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1460 * @dfl_sense_key: default sense key to use 1461 * 1462 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1463 * SENSE. This function is EH helper. 1464 * 1465 * LOCKING: 1466 * Kernel thread context (may sleep). 1467 * 1468 * RETURNS: 1469 * 0 on success, AC_ERR_* mask on failure 1470 */ 1471 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 1472 u8 *sense_buf, u8 dfl_sense_key) 1473 { 1474 u8 cdb[ATAPI_CDB_LEN] = 1475 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1476 struct ata_port *ap = dev->link->ap; 1477 struct ata_taskfile tf; 1478 1479 DPRINTK("ATAPI request sense\n"); 1480 1481 /* FIXME: is this needed? */ 1482 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1483 1484 /* initialize sense_buf with the error register, 1485 * for the case where they are -not- overwritten 1486 */ 1487 sense_buf[0] = 0x70; 1488 sense_buf[2] = dfl_sense_key; 1489 1490 /* some devices time out if garbage left in tf */ 1491 ata_tf_init(dev, &tf); 1492 1493 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1494 tf.command = ATA_CMD_PACKET; 1495 1496 /* is it pointless to prefer PIO for "safety reasons"? */ 1497 if (ap->flags & ATA_FLAG_PIO_DMA) { 1498 tf.protocol = ATAPI_PROT_DMA; 1499 tf.feature |= ATAPI_PKT_DMA; 1500 } else { 1501 tf.protocol = ATAPI_PROT_PIO; 1502 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1503 tf.lbah = 0; 1504 } 1505 1506 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1507 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1508 } 1509 1510 /** 1511 * ata_eh_analyze_serror - analyze SError for a failed port 1512 * @link: ATA link to analyze SError for 1513 * 1514 * Analyze SError if available and further determine cause of 1515 * failure. 1516 * 1517 * LOCKING: 1518 * None. 1519 */ 1520 static void ata_eh_analyze_serror(struct ata_link *link) 1521 { 1522 struct ata_eh_context *ehc = &link->eh_context; 1523 u32 serror = ehc->i.serror; 1524 unsigned int err_mask = 0, action = 0; 1525 u32 hotplug_mask; 1526 1527 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1528 err_mask |= AC_ERR_ATA_BUS; 1529 action |= ATA_EH_RESET; 1530 } 1531 if (serror & SERR_PROTOCOL) { 1532 err_mask |= AC_ERR_HSM; 1533 action |= ATA_EH_RESET; 1534 } 1535 if (serror & SERR_INTERNAL) { 1536 err_mask |= AC_ERR_SYSTEM; 1537 action |= ATA_EH_RESET; 1538 } 1539 1540 /* Determine whether a hotplug event has occurred. Both 1541 * SError.N/X are considered hotplug events for enabled or 1542 * host links. For disabled PMP links, only N bit is 1543 * considered as X bit is left at 1 for link plugging. 1544 */ 1545 hotplug_mask = 0; 1546 1547 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1548 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1549 else 1550 hotplug_mask = SERR_PHYRDY_CHG; 1551 1552 if (serror & hotplug_mask) 1553 ata_ehi_hotplugged(&ehc->i); 1554 1555 ehc->i.err_mask |= err_mask; 1556 ehc->i.action |= action; 1557 } 1558 1559 /** 1560 * ata_eh_analyze_ncq_error - analyze NCQ error 1561 * @link: ATA link to analyze NCQ error for 1562 * 1563 * Read log page 10h, determine the offending qc and acquire 1564 * error status TF. For NCQ device errors, all LLDDs have to do 1565 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1566 * care of the rest. 1567 * 1568 * LOCKING: 1569 * Kernel thread context (may sleep). 1570 */ 1571 void ata_eh_analyze_ncq_error(struct ata_link *link) 1572 { 1573 struct ata_port *ap = link->ap; 1574 struct ata_eh_context *ehc = &link->eh_context; 1575 struct ata_device *dev = link->device; 1576 struct ata_queued_cmd *qc; 1577 struct ata_taskfile tf; 1578 int tag, rc; 1579 1580 /* if frozen, we can't do much */ 1581 if (ap->pflags & ATA_PFLAG_FROZEN) 1582 return; 1583 1584 /* is it NCQ device error? */ 1585 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1586 return; 1587 1588 /* has LLDD analyzed already? */ 1589 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1590 qc = __ata_qc_from_tag(ap, tag); 1591 1592 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1593 continue; 1594 1595 if (qc->err_mask) 1596 return; 1597 } 1598 1599 /* okay, this error is ours */ 1600 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1601 if (rc) { 1602 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1603 "(errno=%d)\n", rc); 1604 return; 1605 } 1606 1607 if (!(link->sactive & (1 << tag))) { 1608 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1609 "inactive tag %d\n", tag); 1610 return; 1611 } 1612 1613 /* we've got the perpetrator, condemn it */ 1614 qc = __ata_qc_from_tag(ap, tag); 1615 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1616 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1617 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1618 ehc->i.err_mask &= ~AC_ERR_DEV; 1619 } 1620 1621 /** 1622 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1623 * @qc: qc to analyze 1624 * @tf: Taskfile registers to analyze 1625 * 1626 * Analyze taskfile of @qc and further determine cause of 1627 * failure. This function also requests ATAPI sense data if 1628 * avaliable. 1629 * 1630 * LOCKING: 1631 * Kernel thread context (may sleep). 1632 * 1633 * RETURNS: 1634 * Determined recovery action 1635 */ 1636 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1637 const struct ata_taskfile *tf) 1638 { 1639 unsigned int tmp, action = 0; 1640 u8 stat = tf->command, err = tf->feature; 1641 1642 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1643 qc->err_mask |= AC_ERR_HSM; 1644 return ATA_EH_RESET; 1645 } 1646 1647 if (stat & (ATA_ERR | ATA_DF)) 1648 qc->err_mask |= AC_ERR_DEV; 1649 else 1650 return 0; 1651 1652 switch (qc->dev->class) { 1653 case ATA_DEV_ATA: 1654 if (err & ATA_ICRC) 1655 qc->err_mask |= AC_ERR_ATA_BUS; 1656 if (err & ATA_UNC) 1657 qc->err_mask |= AC_ERR_MEDIA; 1658 if (err & ATA_IDNF) 1659 qc->err_mask |= AC_ERR_INVALID; 1660 break; 1661 1662 case ATA_DEV_ATAPI: 1663 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1664 tmp = atapi_eh_request_sense(qc->dev, 1665 qc->scsicmd->sense_buffer, 1666 qc->result_tf.feature >> 4); 1667 if (!tmp) { 1668 /* ATA_QCFLAG_SENSE_VALID is used to 1669 * tell atapi_qc_complete() that sense 1670 * data is already valid. 1671 * 1672 * TODO: interpret sense data and set 1673 * appropriate err_mask. 1674 */ 1675 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1676 } else 1677 qc->err_mask |= tmp; 1678 } 1679 } 1680 1681 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1682 action |= ATA_EH_RESET; 1683 1684 return action; 1685 } 1686 1687 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1688 int *xfer_ok) 1689 { 1690 int base = 0; 1691 1692 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1693 *xfer_ok = 1; 1694 1695 if (!*xfer_ok) 1696 base = ATA_ECAT_DUBIOUS_NONE; 1697 1698 if (err_mask & AC_ERR_ATA_BUS) 1699 return base + ATA_ECAT_ATA_BUS; 1700 1701 if (err_mask & AC_ERR_TIMEOUT) 1702 return base + ATA_ECAT_TOUT_HSM; 1703 1704 if (eflags & ATA_EFLAG_IS_IO) { 1705 if (err_mask & AC_ERR_HSM) 1706 return base + ATA_ECAT_TOUT_HSM; 1707 if ((err_mask & 1708 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1709 return base + ATA_ECAT_UNK_DEV; 1710 } 1711 1712 return 0; 1713 } 1714 1715 struct speed_down_verdict_arg { 1716 u64 since; 1717 int xfer_ok; 1718 int nr_errors[ATA_ECAT_NR]; 1719 }; 1720 1721 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1722 { 1723 struct speed_down_verdict_arg *arg = void_arg; 1724 int cat; 1725 1726 if (ent->timestamp < arg->since) 1727 return -1; 1728 1729 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1730 &arg->xfer_ok); 1731 arg->nr_errors[cat]++; 1732 1733 return 0; 1734 } 1735 1736 /** 1737 * ata_eh_speed_down_verdict - Determine speed down verdict 1738 * @dev: Device of interest 1739 * 1740 * This function examines error ring of @dev and determines 1741 * whether NCQ needs to be turned off, transfer speed should be 1742 * stepped down, or falling back to PIO is necessary. 1743 * 1744 * ECAT_ATA_BUS : ATA_BUS error for any command 1745 * 1746 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1747 * IO commands 1748 * 1749 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1750 * 1751 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1752 * data transfer hasn't been verified. 1753 * 1754 * Verdicts are 1755 * 1756 * NCQ_OFF : Turn off NCQ. 1757 * 1758 * SPEED_DOWN : Speed down transfer speed but don't fall back 1759 * to PIO. 1760 * 1761 * FALLBACK_TO_PIO : Fall back to PIO. 1762 * 1763 * Even if multiple verdicts are returned, only one action is 1764 * taken per error. An action triggered by non-DUBIOUS errors 1765 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1766 * This is to expedite speed down decisions right after device is 1767 * initially configured. 1768 * 1769 * The followings are speed down rules. #1 and #2 deal with 1770 * DUBIOUS errors. 1771 * 1772 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1773 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1774 * 1775 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1776 * occurred during last 5 mins, NCQ_OFF. 1777 * 1778 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1779 * ocurred during last 5 mins, FALLBACK_TO_PIO 1780 * 1781 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1782 * during last 10 mins, NCQ_OFF. 1783 * 1784 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1785 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1786 * 1787 * LOCKING: 1788 * Inherited from caller. 1789 * 1790 * RETURNS: 1791 * OR of ATA_EH_SPDN_* flags. 1792 */ 1793 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1794 { 1795 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1796 u64 j64 = get_jiffies_64(); 1797 struct speed_down_verdict_arg arg; 1798 unsigned int verdict = 0; 1799 1800 /* scan past 5 mins of error history */ 1801 memset(&arg, 0, sizeof(arg)); 1802 arg.since = j64 - min(j64, j5mins); 1803 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1804 1805 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1806 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1807 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1808 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1809 1810 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1811 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1812 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1813 1814 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1815 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1816 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1817 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1818 1819 /* scan past 10 mins of error history */ 1820 memset(&arg, 0, sizeof(arg)); 1821 arg.since = j64 - min(j64, j10mins); 1822 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1823 1824 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1825 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1826 verdict |= ATA_EH_SPDN_NCQ_OFF; 1827 1828 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1829 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1830 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1831 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1832 1833 return verdict; 1834 } 1835 1836 /** 1837 * ata_eh_speed_down - record error and speed down if necessary 1838 * @dev: Failed device 1839 * @eflags: mask of ATA_EFLAG_* flags 1840 * @err_mask: err_mask of the error 1841 * 1842 * Record error and examine error history to determine whether 1843 * adjusting transmission speed is necessary. It also sets 1844 * transmission limits appropriately if such adjustment is 1845 * necessary. 1846 * 1847 * LOCKING: 1848 * Kernel thread context (may sleep). 1849 * 1850 * RETURNS: 1851 * Determined recovery action. 1852 */ 1853 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1854 unsigned int eflags, unsigned int err_mask) 1855 { 1856 struct ata_link *link = ata_dev_phys_link(dev); 1857 int xfer_ok = 0; 1858 unsigned int verdict; 1859 unsigned int action = 0; 1860 1861 /* don't bother if Cat-0 error */ 1862 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1863 return 0; 1864 1865 /* record error and determine whether speed down is necessary */ 1866 ata_ering_record(&dev->ering, eflags, err_mask); 1867 verdict = ata_eh_speed_down_verdict(dev); 1868 1869 /* turn off NCQ? */ 1870 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1871 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1872 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1873 dev->flags |= ATA_DFLAG_NCQ_OFF; 1874 ata_dev_printk(dev, KERN_WARNING, 1875 "NCQ disabled due to excessive errors\n"); 1876 goto done; 1877 } 1878 1879 /* speed down? */ 1880 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1881 /* speed down SATA link speed if possible */ 1882 if (sata_down_spd_limit(link, 0) == 0) { 1883 action |= ATA_EH_RESET; 1884 goto done; 1885 } 1886 1887 /* lower transfer mode */ 1888 if (dev->spdn_cnt < 2) { 1889 static const int dma_dnxfer_sel[] = 1890 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1891 static const int pio_dnxfer_sel[] = 1892 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1893 int sel; 1894 1895 if (dev->xfer_shift != ATA_SHIFT_PIO) 1896 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1897 else 1898 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1899 1900 dev->spdn_cnt++; 1901 1902 if (ata_down_xfermask_limit(dev, sel) == 0) { 1903 action |= ATA_EH_RESET; 1904 goto done; 1905 } 1906 } 1907 } 1908 1909 /* Fall back to PIO? Slowing down to PIO is meaningless for 1910 * SATA ATA devices. Consider it only for PATA and SATAPI. 1911 */ 1912 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1913 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1914 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1915 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1916 dev->spdn_cnt = 0; 1917 action |= ATA_EH_RESET; 1918 goto done; 1919 } 1920 } 1921 1922 return 0; 1923 done: 1924 /* device has been slowed down, blow error history */ 1925 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1926 ata_ering_clear(&dev->ering); 1927 return action; 1928 } 1929 1930 /** 1931 * ata_eh_link_autopsy - analyze error and determine recovery action 1932 * @link: host link to perform autopsy on 1933 * 1934 * Analyze why @link failed and determine which recovery actions 1935 * are needed. This function also sets more detailed AC_ERR_* 1936 * values and fills sense data for ATAPI CHECK SENSE. 1937 * 1938 * LOCKING: 1939 * Kernel thread context (may sleep). 1940 */ 1941 static void ata_eh_link_autopsy(struct ata_link *link) 1942 { 1943 struct ata_port *ap = link->ap; 1944 struct ata_eh_context *ehc = &link->eh_context; 1945 struct ata_device *dev; 1946 unsigned int all_err_mask = 0, eflags = 0; 1947 int tag; 1948 u32 serror; 1949 int rc; 1950 1951 DPRINTK("ENTER\n"); 1952 1953 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1954 return; 1955 1956 /* obtain and analyze SError */ 1957 rc = sata_scr_read(link, SCR_ERROR, &serror); 1958 if (rc == 0) { 1959 ehc->i.serror |= serror; 1960 ata_eh_analyze_serror(link); 1961 } else if (rc != -EOPNOTSUPP) { 1962 /* SError read failed, force reset and probing */ 1963 ehc->i.probe_mask |= ATA_ALL_DEVICES; 1964 ehc->i.action |= ATA_EH_RESET; 1965 ehc->i.err_mask |= AC_ERR_OTHER; 1966 } 1967 1968 /* analyze NCQ failure */ 1969 ata_eh_analyze_ncq_error(link); 1970 1971 /* any real error trumps AC_ERR_OTHER */ 1972 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1973 ehc->i.err_mask &= ~AC_ERR_OTHER; 1974 1975 all_err_mask |= ehc->i.err_mask; 1976 1977 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1978 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1979 1980 if (!(qc->flags & ATA_QCFLAG_FAILED) || 1981 ata_dev_phys_link(qc->dev) != link) 1982 continue; 1983 1984 /* inherit upper level err_mask */ 1985 qc->err_mask |= ehc->i.err_mask; 1986 1987 /* analyze TF */ 1988 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1989 1990 /* DEV errors are probably spurious in case of ATA_BUS error */ 1991 if (qc->err_mask & AC_ERR_ATA_BUS) 1992 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1993 AC_ERR_INVALID); 1994 1995 /* any real error trumps unknown error */ 1996 if (qc->err_mask & ~AC_ERR_OTHER) 1997 qc->err_mask &= ~AC_ERR_OTHER; 1998 1999 /* SENSE_VALID trumps dev/unknown error and revalidation */ 2000 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2001 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2002 2003 /* determine whether the command is worth retrying */ 2004 if (!(qc->err_mask & AC_ERR_INVALID) && 2005 ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) 2006 qc->flags |= ATA_QCFLAG_RETRY; 2007 2008 /* accumulate error info */ 2009 ehc->i.dev = qc->dev; 2010 all_err_mask |= qc->err_mask; 2011 if (qc->flags & ATA_QCFLAG_IO) 2012 eflags |= ATA_EFLAG_IS_IO; 2013 } 2014 2015 /* enforce default EH actions */ 2016 if (ap->pflags & ATA_PFLAG_FROZEN || 2017 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2018 ehc->i.action |= ATA_EH_RESET; 2019 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2020 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2021 ehc->i.action |= ATA_EH_REVALIDATE; 2022 2023 /* If we have offending qcs and the associated failed device, 2024 * perform per-dev EH action only on the offending device. 2025 */ 2026 if (ehc->i.dev) { 2027 ehc->i.dev_action[ehc->i.dev->devno] |= 2028 ehc->i.action & ATA_EH_PERDEV_MASK; 2029 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2030 } 2031 2032 /* propagate timeout to host link */ 2033 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2034 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2035 2036 /* record error and consider speeding down */ 2037 dev = ehc->i.dev; 2038 if (!dev && ((ata_link_max_devices(link) == 1 && 2039 ata_dev_enabled(link->device)))) 2040 dev = link->device; 2041 2042 if (dev) { 2043 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2044 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2045 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2046 } 2047 2048 DPRINTK("EXIT\n"); 2049 } 2050 2051 /** 2052 * ata_eh_autopsy - analyze error and determine recovery action 2053 * @ap: host port to perform autopsy on 2054 * 2055 * Analyze all links of @ap and determine why they failed and 2056 * which recovery actions are needed. 2057 * 2058 * LOCKING: 2059 * Kernel thread context (may sleep). 2060 */ 2061 void ata_eh_autopsy(struct ata_port *ap) 2062 { 2063 struct ata_link *link; 2064 2065 ata_for_each_link(link, ap, EDGE) 2066 ata_eh_link_autopsy(link); 2067 2068 /* Handle the frigging slave link. Autopsy is done similarly 2069 * but actions and flags are transferred over to the master 2070 * link and handled from there. 2071 */ 2072 if (ap->slave_link) { 2073 struct ata_eh_context *mehc = &ap->link.eh_context; 2074 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2075 2076 /* transfer control flags from master to slave */ 2077 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2078 2079 /* perform autopsy on the slave link */ 2080 ata_eh_link_autopsy(ap->slave_link); 2081 2082 /* transfer actions from slave to master and clear slave */ 2083 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2084 mehc->i.action |= sehc->i.action; 2085 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2086 mehc->i.flags |= sehc->i.flags; 2087 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2088 } 2089 2090 /* Autopsy of fanout ports can affect host link autopsy. 2091 * Perform host link autopsy last. 2092 */ 2093 if (sata_pmp_attached(ap)) 2094 ata_eh_link_autopsy(&ap->link); 2095 } 2096 2097 /** 2098 * ata_eh_link_report - report error handling to user 2099 * @link: ATA link EH is going on 2100 * 2101 * Report EH to user. 2102 * 2103 * LOCKING: 2104 * None. 2105 */ 2106 static void ata_eh_link_report(struct ata_link *link) 2107 { 2108 struct ata_port *ap = link->ap; 2109 struct ata_eh_context *ehc = &link->eh_context; 2110 const char *frozen, *desc; 2111 char tries_buf[6]; 2112 int tag, nr_failed = 0; 2113 2114 if (ehc->i.flags & ATA_EHI_QUIET) 2115 return; 2116 2117 desc = NULL; 2118 if (ehc->i.desc[0] != '\0') 2119 desc = ehc->i.desc; 2120 2121 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2122 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2123 2124 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2125 ata_dev_phys_link(qc->dev) != link || 2126 ((qc->flags & ATA_QCFLAG_QUIET) && 2127 qc->err_mask == AC_ERR_DEV)) 2128 continue; 2129 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2130 continue; 2131 2132 nr_failed++; 2133 } 2134 2135 if (!nr_failed && !ehc->i.err_mask) 2136 return; 2137 2138 frozen = ""; 2139 if (ap->pflags & ATA_PFLAG_FROZEN) 2140 frozen = " frozen"; 2141 2142 memset(tries_buf, 0, sizeof(tries_buf)); 2143 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2144 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 2145 ap->eh_tries); 2146 2147 if (ehc->i.dev) { 2148 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 2149 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2150 ehc->i.err_mask, link->sactive, ehc->i.serror, 2151 ehc->i.action, frozen, tries_buf); 2152 if (desc) 2153 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 2154 } else { 2155 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 2156 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2157 ehc->i.err_mask, link->sactive, ehc->i.serror, 2158 ehc->i.action, frozen, tries_buf); 2159 if (desc) 2160 ata_link_printk(link, KERN_ERR, "%s\n", desc); 2161 } 2162 2163 if (ehc->i.serror) 2164 ata_link_printk(link, KERN_ERR, 2165 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2166 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2167 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2168 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2169 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2170 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2171 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2172 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2173 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2174 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2175 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2176 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2177 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2178 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2179 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2180 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2181 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2182 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2183 2184 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2185 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2186 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2187 const u8 *cdb = qc->cdb; 2188 char data_buf[20] = ""; 2189 char cdb_buf[70] = ""; 2190 2191 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2192 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2193 continue; 2194 2195 if (qc->dma_dir != DMA_NONE) { 2196 static const char *dma_str[] = { 2197 [DMA_BIDIRECTIONAL] = "bidi", 2198 [DMA_TO_DEVICE] = "out", 2199 [DMA_FROM_DEVICE] = "in", 2200 }; 2201 static const char *prot_str[] = { 2202 [ATA_PROT_PIO] = "pio", 2203 [ATA_PROT_DMA] = "dma", 2204 [ATA_PROT_NCQ] = "ncq", 2205 [ATAPI_PROT_PIO] = "pio", 2206 [ATAPI_PROT_DMA] = "dma", 2207 }; 2208 2209 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2210 prot_str[qc->tf.protocol], qc->nbytes, 2211 dma_str[qc->dma_dir]); 2212 } 2213 2214 if (ata_is_atapi(qc->tf.protocol)) 2215 snprintf(cdb_buf, sizeof(cdb_buf), 2216 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 2217 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 2218 cdb[0], cdb[1], cdb[2], cdb[3], 2219 cdb[4], cdb[5], cdb[6], cdb[7], 2220 cdb[8], cdb[9], cdb[10], cdb[11], 2221 cdb[12], cdb[13], cdb[14], cdb[15]); 2222 2223 ata_dev_printk(qc->dev, KERN_ERR, 2224 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2225 "tag %d%s\n %s" 2226 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2227 "Emask 0x%x (%s)%s\n", 2228 cmd->command, cmd->feature, cmd->nsect, 2229 cmd->lbal, cmd->lbam, cmd->lbah, 2230 cmd->hob_feature, cmd->hob_nsect, 2231 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2232 cmd->device, qc->tag, data_buf, cdb_buf, 2233 res->command, res->feature, res->nsect, 2234 res->lbal, res->lbam, res->lbah, 2235 res->hob_feature, res->hob_nsect, 2236 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2237 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2238 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2239 2240 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2241 ATA_ERR)) { 2242 if (res->command & ATA_BUSY) 2243 ata_dev_printk(qc->dev, KERN_ERR, 2244 "status: { Busy }\n"); 2245 else 2246 ata_dev_printk(qc->dev, KERN_ERR, 2247 "status: { %s%s%s%s}\n", 2248 res->command & ATA_DRDY ? "DRDY " : "", 2249 res->command & ATA_DF ? "DF " : "", 2250 res->command & ATA_DRQ ? "DRQ " : "", 2251 res->command & ATA_ERR ? "ERR " : ""); 2252 } 2253 2254 if (cmd->command != ATA_CMD_PACKET && 2255 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2256 ATA_ABORTED))) 2257 ata_dev_printk(qc->dev, KERN_ERR, 2258 "error: { %s%s%s%s}\n", 2259 res->feature & ATA_ICRC ? "ICRC " : "", 2260 res->feature & ATA_UNC ? "UNC " : "", 2261 res->feature & ATA_IDNF ? "IDNF " : "", 2262 res->feature & ATA_ABORTED ? "ABRT " : ""); 2263 } 2264 } 2265 2266 /** 2267 * ata_eh_report - report error handling to user 2268 * @ap: ATA port to report EH about 2269 * 2270 * Report EH to user. 2271 * 2272 * LOCKING: 2273 * None. 2274 */ 2275 void ata_eh_report(struct ata_port *ap) 2276 { 2277 struct ata_link *link; 2278 2279 ata_for_each_link(link, ap, HOST_FIRST) 2280 ata_eh_link_report(link); 2281 } 2282 2283 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2284 unsigned int *classes, unsigned long deadline, 2285 bool clear_classes) 2286 { 2287 struct ata_device *dev; 2288 2289 if (clear_classes) 2290 ata_for_each_dev(dev, link, ALL) 2291 classes[dev->devno] = ATA_DEV_UNKNOWN; 2292 2293 return reset(link, classes, deadline); 2294 } 2295 2296 static int ata_eh_followup_srst_needed(struct ata_link *link, 2297 int rc, const unsigned int *classes) 2298 { 2299 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2300 return 0; 2301 if (rc == -EAGAIN) 2302 return 1; 2303 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2304 return 1; 2305 return 0; 2306 } 2307 2308 int ata_eh_reset(struct ata_link *link, int classify, 2309 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2310 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2311 { 2312 struct ata_port *ap = link->ap; 2313 struct ata_link *slave = ap->slave_link; 2314 struct ata_eh_context *ehc = &link->eh_context; 2315 struct ata_eh_context *sehc = &slave->eh_context; 2316 unsigned int *classes = ehc->classes; 2317 unsigned int lflags = link->flags; 2318 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2319 int max_tries = 0, try = 0; 2320 struct ata_link *failed_link; 2321 struct ata_device *dev; 2322 unsigned long deadline, now; 2323 ata_reset_fn_t reset; 2324 unsigned long flags; 2325 u32 sstatus; 2326 int nr_unknown, rc; 2327 2328 /* 2329 * Prepare to reset 2330 */ 2331 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2332 max_tries++; 2333 if (link->flags & ATA_LFLAG_NO_HRST) 2334 hardreset = NULL; 2335 if (link->flags & ATA_LFLAG_NO_SRST) 2336 softreset = NULL; 2337 2338 /* make sure each reset attemp is at least COOL_DOWN apart */ 2339 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2340 now = jiffies; 2341 WARN_ON(time_after(ehc->last_reset, now)); 2342 deadline = ata_deadline(ehc->last_reset, 2343 ATA_EH_RESET_COOL_DOWN); 2344 if (time_before(now, deadline)) 2345 schedule_timeout_uninterruptible(deadline - now); 2346 } 2347 2348 spin_lock_irqsave(ap->lock, flags); 2349 ap->pflags |= ATA_PFLAG_RESETTING; 2350 spin_unlock_irqrestore(ap->lock, flags); 2351 2352 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2353 2354 ata_for_each_dev(dev, link, ALL) { 2355 /* If we issue an SRST then an ATA drive (not ATAPI) 2356 * may change configuration and be in PIO0 timing. If 2357 * we do a hard reset (or are coming from power on) 2358 * this is true for ATA or ATAPI. Until we've set a 2359 * suitable controller mode we should not touch the 2360 * bus as we may be talking too fast. 2361 */ 2362 dev->pio_mode = XFER_PIO_0; 2363 2364 /* If the controller has a pio mode setup function 2365 * then use it to set the chipset to rights. Don't 2366 * touch the DMA setup as that will be dealt with when 2367 * configuring devices. 2368 */ 2369 if (ap->ops->set_piomode) 2370 ap->ops->set_piomode(ap, dev); 2371 } 2372 2373 /* prefer hardreset */ 2374 reset = NULL; 2375 ehc->i.action &= ~ATA_EH_RESET; 2376 if (hardreset) { 2377 reset = hardreset; 2378 ehc->i.action |= ATA_EH_HARDRESET; 2379 } else if (softreset) { 2380 reset = softreset; 2381 ehc->i.action |= ATA_EH_SOFTRESET; 2382 } 2383 2384 if (prereset) { 2385 unsigned long deadline = ata_deadline(jiffies, 2386 ATA_EH_PRERESET_TIMEOUT); 2387 2388 if (slave) { 2389 sehc->i.action &= ~ATA_EH_RESET; 2390 sehc->i.action |= ehc->i.action; 2391 } 2392 2393 rc = prereset(link, deadline); 2394 2395 /* If present, do prereset on slave link too. Reset 2396 * is skipped iff both master and slave links report 2397 * -ENOENT or clear ATA_EH_RESET. 2398 */ 2399 if (slave && (rc == 0 || rc == -ENOENT)) { 2400 int tmp; 2401 2402 tmp = prereset(slave, deadline); 2403 if (tmp != -ENOENT) 2404 rc = tmp; 2405 2406 ehc->i.action |= sehc->i.action; 2407 } 2408 2409 if (rc) { 2410 if (rc == -ENOENT) { 2411 ata_link_printk(link, KERN_DEBUG, 2412 "port disabled. ignoring.\n"); 2413 ehc->i.action &= ~ATA_EH_RESET; 2414 2415 ata_for_each_dev(dev, link, ALL) 2416 classes[dev->devno] = ATA_DEV_NONE; 2417 2418 rc = 0; 2419 } else 2420 ata_link_printk(link, KERN_ERR, 2421 "prereset failed (errno=%d)\n", rc); 2422 goto out; 2423 } 2424 2425 /* prereset() might have cleared ATA_EH_RESET. If so, 2426 * bang classes, thaw and return. 2427 */ 2428 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2429 ata_for_each_dev(dev, link, ALL) 2430 classes[dev->devno] = ATA_DEV_NONE; 2431 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2432 ata_is_host_link(link)) 2433 ata_eh_thaw_port(ap); 2434 rc = 0; 2435 goto out; 2436 } 2437 } 2438 2439 retry: 2440 /* 2441 * Perform reset 2442 */ 2443 if (ata_is_host_link(link)) 2444 ata_eh_freeze_port(ap); 2445 2446 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2447 2448 if (reset) { 2449 if (verbose) 2450 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2451 reset == softreset ? "soft" : "hard"); 2452 2453 /* mark that this EH session started with reset */ 2454 ehc->last_reset = jiffies; 2455 if (reset == hardreset) 2456 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2457 else 2458 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2459 2460 rc = ata_do_reset(link, reset, classes, deadline, true); 2461 if (rc && rc != -EAGAIN) { 2462 failed_link = link; 2463 goto fail; 2464 } 2465 2466 /* hardreset slave link if existent */ 2467 if (slave && reset == hardreset) { 2468 int tmp; 2469 2470 if (verbose) 2471 ata_link_printk(slave, KERN_INFO, 2472 "hard resetting link\n"); 2473 2474 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2475 tmp = ata_do_reset(slave, reset, classes, deadline, 2476 false); 2477 switch (tmp) { 2478 case -EAGAIN: 2479 rc = -EAGAIN; 2480 case 0: 2481 break; 2482 default: 2483 failed_link = slave; 2484 rc = tmp; 2485 goto fail; 2486 } 2487 } 2488 2489 /* perform follow-up SRST if necessary */ 2490 if (reset == hardreset && 2491 ata_eh_followup_srst_needed(link, rc, classes)) { 2492 reset = softreset; 2493 2494 if (!reset) { 2495 ata_link_printk(link, KERN_ERR, 2496 "follow-up softreset required " 2497 "but no softreset avaliable\n"); 2498 failed_link = link; 2499 rc = -EINVAL; 2500 goto fail; 2501 } 2502 2503 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2504 rc = ata_do_reset(link, reset, classes, deadline, true); 2505 } 2506 } else { 2507 if (verbose) 2508 ata_link_printk(link, KERN_INFO, "no reset method " 2509 "available, skipping reset\n"); 2510 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2511 lflags |= ATA_LFLAG_ASSUME_ATA; 2512 } 2513 2514 /* 2515 * Post-reset processing 2516 */ 2517 ata_for_each_dev(dev, link, ALL) { 2518 /* After the reset, the device state is PIO 0 and the 2519 * controller state is undefined. Reset also wakes up 2520 * drives from sleeping mode. 2521 */ 2522 dev->pio_mode = XFER_PIO_0; 2523 dev->flags &= ~ATA_DFLAG_SLEEPING; 2524 2525 if (!ata_phys_link_offline(ata_dev_phys_link(dev))) { 2526 /* apply class override */ 2527 if (lflags & ATA_LFLAG_ASSUME_ATA) 2528 classes[dev->devno] = ATA_DEV_ATA; 2529 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2530 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2531 } else 2532 classes[dev->devno] = ATA_DEV_NONE; 2533 } 2534 2535 /* record current link speed */ 2536 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2537 link->sata_spd = (sstatus >> 4) & 0xf; 2538 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2539 slave->sata_spd = (sstatus >> 4) & 0xf; 2540 2541 /* thaw the port */ 2542 if (ata_is_host_link(link)) 2543 ata_eh_thaw_port(ap); 2544 2545 /* postreset() should clear hardware SError. Although SError 2546 * is cleared during link resume, clearing SError here is 2547 * necessary as some PHYs raise hotplug events after SRST. 2548 * This introduces race condition where hotplug occurs between 2549 * reset and here. This race is mediated by cross checking 2550 * link onlineness and classification result later. 2551 */ 2552 if (postreset) { 2553 postreset(link, classes); 2554 if (slave) 2555 postreset(slave, classes); 2556 } 2557 2558 /* clear cached SError */ 2559 spin_lock_irqsave(link->ap->lock, flags); 2560 link->eh_info.serror = 0; 2561 if (slave) 2562 slave->eh_info.serror = 0; 2563 spin_unlock_irqrestore(link->ap->lock, flags); 2564 2565 /* Make sure onlineness and classification result correspond. 2566 * Hotplug could have happened during reset and some 2567 * controllers fail to wait while a drive is spinning up after 2568 * being hotplugged causing misdetection. By cross checking 2569 * link onlineness and classification result, those conditions 2570 * can be reliably detected and retried. 2571 */ 2572 nr_unknown = 0; 2573 ata_for_each_dev(dev, link, ALL) { 2574 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ 2575 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2576 classes[dev->devno] = ATA_DEV_NONE; 2577 if (ata_phys_link_online(ata_dev_phys_link(dev))) 2578 nr_unknown++; 2579 } 2580 } 2581 2582 if (classify && nr_unknown) { 2583 if (try < max_tries) { 2584 ata_link_printk(link, KERN_WARNING, "link online but " 2585 "device misclassified, retrying\n"); 2586 failed_link = link; 2587 rc = -EAGAIN; 2588 goto fail; 2589 } 2590 ata_link_printk(link, KERN_WARNING, 2591 "link online but device misclassified, " 2592 "device detection might fail\n"); 2593 } 2594 2595 /* reset successful, schedule revalidation */ 2596 ata_eh_done(link, NULL, ATA_EH_RESET); 2597 if (slave) 2598 ata_eh_done(slave, NULL, ATA_EH_RESET); 2599 ehc->last_reset = jiffies; /* update to completion time */ 2600 ehc->i.action |= ATA_EH_REVALIDATE; 2601 2602 rc = 0; 2603 out: 2604 /* clear hotplug flag */ 2605 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2606 if (slave) 2607 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2608 2609 spin_lock_irqsave(ap->lock, flags); 2610 ap->pflags &= ~ATA_PFLAG_RESETTING; 2611 spin_unlock_irqrestore(ap->lock, flags); 2612 2613 return rc; 2614 2615 fail: 2616 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2617 if (!ata_is_host_link(link) && 2618 sata_scr_read(link, SCR_STATUS, &sstatus)) 2619 rc = -ERESTART; 2620 2621 if (rc == -ERESTART || try >= max_tries) 2622 goto out; 2623 2624 now = jiffies; 2625 if (time_before(now, deadline)) { 2626 unsigned long delta = deadline - now; 2627 2628 ata_link_printk(failed_link, KERN_WARNING, 2629 "reset failed (errno=%d), retrying in %u secs\n", 2630 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2631 2632 while (delta) 2633 delta = schedule_timeout_uninterruptible(delta); 2634 } 2635 2636 if (try == max_tries - 1) { 2637 sata_down_spd_limit(link, 0); 2638 if (slave) 2639 sata_down_spd_limit(slave, 0); 2640 } else if (rc == -EPIPE) 2641 sata_down_spd_limit(failed_link, 0); 2642 2643 if (hardreset) 2644 reset = hardreset; 2645 goto retry; 2646 } 2647 2648 static inline void ata_eh_pull_park_action(struct ata_port *ap) 2649 { 2650 struct ata_link *link; 2651 struct ata_device *dev; 2652 unsigned long flags; 2653 2654 /* 2655 * This function can be thought of as an extended version of 2656 * ata_eh_about_to_do() specially crafted to accommodate the 2657 * requirements of ATA_EH_PARK handling. Since the EH thread 2658 * does not leave the do {} while () loop in ata_eh_recover as 2659 * long as the timeout for a park request to *one* device on 2660 * the port has not expired, and since we still want to pick 2661 * up park requests to other devices on the same port or 2662 * timeout updates for the same device, we have to pull 2663 * ATA_EH_PARK actions from eh_info into eh_context.i 2664 * ourselves at the beginning of each pass over the loop. 2665 * 2666 * Additionally, all write accesses to &ap->park_req_pending 2667 * through INIT_COMPLETION() (see below) or complete_all() 2668 * (see ata_scsi_park_store()) are protected by the host lock. 2669 * As a result we have that park_req_pending.done is zero on 2670 * exit from this function, i.e. when ATA_EH_PARK actions for 2671 * *all* devices on port ap have been pulled into the 2672 * respective eh_context structs. If, and only if, 2673 * park_req_pending.done is non-zero by the time we reach 2674 * wait_for_completion_timeout(), another ATA_EH_PARK action 2675 * has been scheduled for at least one of the devices on port 2676 * ap and we have to cycle over the do {} while () loop in 2677 * ata_eh_recover() again. 2678 */ 2679 2680 spin_lock_irqsave(ap->lock, flags); 2681 INIT_COMPLETION(ap->park_req_pending); 2682 ata_for_each_link(link, ap, EDGE) { 2683 ata_for_each_dev(dev, link, ALL) { 2684 struct ata_eh_info *ehi = &link->eh_info; 2685 2686 link->eh_context.i.dev_action[dev->devno] |= 2687 ehi->dev_action[dev->devno] & ATA_EH_PARK; 2688 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 2689 } 2690 } 2691 spin_unlock_irqrestore(ap->lock, flags); 2692 } 2693 2694 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 2695 { 2696 struct ata_eh_context *ehc = &dev->link->eh_context; 2697 struct ata_taskfile tf; 2698 unsigned int err_mask; 2699 2700 ata_tf_init(dev, &tf); 2701 if (park) { 2702 ehc->unloaded_mask |= 1 << dev->devno; 2703 tf.command = ATA_CMD_IDLEIMMEDIATE; 2704 tf.feature = 0x44; 2705 tf.lbal = 0x4c; 2706 tf.lbam = 0x4e; 2707 tf.lbah = 0x55; 2708 } else { 2709 ehc->unloaded_mask &= ~(1 << dev->devno); 2710 tf.command = ATA_CMD_CHK_POWER; 2711 } 2712 2713 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 2714 tf.protocol |= ATA_PROT_NODATA; 2715 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 2716 if (park && (err_mask || tf.lbal != 0xc4)) { 2717 ata_dev_printk(dev, KERN_ERR, "head unload failed!\n"); 2718 ehc->unloaded_mask &= ~(1 << dev->devno); 2719 } 2720 } 2721 2722 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2723 struct ata_device **r_failed_dev) 2724 { 2725 struct ata_port *ap = link->ap; 2726 struct ata_eh_context *ehc = &link->eh_context; 2727 struct ata_device *dev; 2728 unsigned int new_mask = 0; 2729 unsigned long flags; 2730 int rc = 0; 2731 2732 DPRINTK("ENTER\n"); 2733 2734 /* For PATA drive side cable detection to work, IDENTIFY must 2735 * be done backwards such that PDIAG- is released by the slave 2736 * device before the master device is identified. 2737 */ 2738 ata_for_each_dev(dev, link, ALL_REVERSE) { 2739 unsigned int action = ata_eh_dev_action(dev); 2740 unsigned int readid_flags = 0; 2741 2742 if (ehc->i.flags & ATA_EHI_DID_RESET) 2743 readid_flags |= ATA_READID_POSTRESET; 2744 2745 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2746 WARN_ON(dev->class == ATA_DEV_PMP); 2747 2748 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2749 rc = -EIO; 2750 goto err; 2751 } 2752 2753 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2754 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2755 readid_flags); 2756 if (rc) 2757 goto err; 2758 2759 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2760 2761 /* Configuration may have changed, reconfigure 2762 * transfer mode. 2763 */ 2764 ehc->i.flags |= ATA_EHI_SETMODE; 2765 2766 /* schedule the scsi_rescan_device() here */ 2767 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2768 } else if (dev->class == ATA_DEV_UNKNOWN && 2769 ehc->tries[dev->devno] && 2770 ata_class_enabled(ehc->classes[dev->devno])) { 2771 dev->class = ehc->classes[dev->devno]; 2772 2773 if (dev->class == ATA_DEV_PMP) 2774 rc = sata_pmp_attach(dev); 2775 else 2776 rc = ata_dev_read_id(dev, &dev->class, 2777 readid_flags, dev->id); 2778 switch (rc) { 2779 case 0: 2780 /* clear error info accumulated during probe */ 2781 ata_ering_clear(&dev->ering); 2782 new_mask |= 1 << dev->devno; 2783 break; 2784 case -ENOENT: 2785 /* IDENTIFY was issued to non-existent 2786 * device. No need to reset. Just 2787 * thaw and kill the device. 2788 */ 2789 ata_eh_thaw_port(ap); 2790 dev->class = ATA_DEV_UNKNOWN; 2791 break; 2792 default: 2793 dev->class = ATA_DEV_UNKNOWN; 2794 goto err; 2795 } 2796 } 2797 } 2798 2799 /* PDIAG- should have been released, ask cable type if post-reset */ 2800 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2801 if (ap->ops->cable_detect) 2802 ap->cbl = ap->ops->cable_detect(ap); 2803 ata_force_cbl(ap); 2804 } 2805 2806 /* Configure new devices forward such that user doesn't see 2807 * device detection messages backwards. 2808 */ 2809 ata_for_each_dev(dev, link, ALL) { 2810 if (!(new_mask & (1 << dev->devno)) || 2811 dev->class == ATA_DEV_PMP) 2812 continue; 2813 2814 ehc->i.flags |= ATA_EHI_PRINTINFO; 2815 rc = ata_dev_configure(dev); 2816 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2817 if (rc) 2818 goto err; 2819 2820 spin_lock_irqsave(ap->lock, flags); 2821 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2822 spin_unlock_irqrestore(ap->lock, flags); 2823 2824 /* new device discovered, configure xfermode */ 2825 ehc->i.flags |= ATA_EHI_SETMODE; 2826 } 2827 2828 return 0; 2829 2830 err: 2831 *r_failed_dev = dev; 2832 DPRINTK("EXIT rc=%d\n", rc); 2833 return rc; 2834 } 2835 2836 /** 2837 * ata_set_mode - Program timings and issue SET FEATURES - XFER 2838 * @link: link on which timings will be programmed 2839 * @r_failed_dev: out paramter for failed device 2840 * 2841 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 2842 * ata_set_mode() fails, pointer to the failing device is 2843 * returned in @r_failed_dev. 2844 * 2845 * LOCKING: 2846 * PCI/etc. bus probe sem. 2847 * 2848 * RETURNS: 2849 * 0 on success, negative errno otherwise 2850 */ 2851 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 2852 { 2853 struct ata_port *ap = link->ap; 2854 struct ata_device *dev; 2855 int rc; 2856 2857 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 2858 ata_for_each_dev(dev, link, ENABLED) { 2859 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 2860 struct ata_ering_entry *ent; 2861 2862 ent = ata_ering_top(&dev->ering); 2863 if (ent) 2864 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 2865 } 2866 } 2867 2868 /* has private set_mode? */ 2869 if (ap->ops->set_mode) 2870 rc = ap->ops->set_mode(link, r_failed_dev); 2871 else 2872 rc = ata_do_set_mode(link, r_failed_dev); 2873 2874 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 2875 ata_for_each_dev(dev, link, ENABLED) { 2876 struct ata_eh_context *ehc = &link->eh_context; 2877 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 2878 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 2879 2880 if (dev->xfer_mode != saved_xfer_mode || 2881 ata_ncq_enabled(dev) != saved_ncq) 2882 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 2883 } 2884 2885 return rc; 2886 } 2887 2888 /** 2889 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 2890 * @dev: ATAPI device to clear UA for 2891 * 2892 * Resets and other operations can make an ATAPI device raise 2893 * UNIT ATTENTION which causes the next operation to fail. This 2894 * function clears UA. 2895 * 2896 * LOCKING: 2897 * EH context (may sleep). 2898 * 2899 * RETURNS: 2900 * 0 on success, -errno on failure. 2901 */ 2902 static int atapi_eh_clear_ua(struct ata_device *dev) 2903 { 2904 int i; 2905 2906 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 2907 u8 *sense_buffer = dev->link->ap->sector_buf; 2908 u8 sense_key = 0; 2909 unsigned int err_mask; 2910 2911 err_mask = atapi_eh_tur(dev, &sense_key); 2912 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 2913 ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY " 2914 "failed (err_mask=0x%x)\n", err_mask); 2915 return -EIO; 2916 } 2917 2918 if (!err_mask || sense_key != UNIT_ATTENTION) 2919 return 0; 2920 2921 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 2922 if (err_mask) { 2923 ata_dev_printk(dev, KERN_WARNING, "failed to clear " 2924 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 2925 return -EIO; 2926 } 2927 } 2928 2929 ata_dev_printk(dev, KERN_WARNING, 2930 "UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES); 2931 2932 return 0; 2933 } 2934 2935 static int ata_link_nr_enabled(struct ata_link *link) 2936 { 2937 struct ata_device *dev; 2938 int cnt = 0; 2939 2940 ata_for_each_dev(dev, link, ENABLED) 2941 cnt++; 2942 return cnt; 2943 } 2944 2945 static int ata_link_nr_vacant(struct ata_link *link) 2946 { 2947 struct ata_device *dev; 2948 int cnt = 0; 2949 2950 ata_for_each_dev(dev, link, ALL) 2951 if (dev->class == ATA_DEV_UNKNOWN) 2952 cnt++; 2953 return cnt; 2954 } 2955 2956 static int ata_eh_skip_recovery(struct ata_link *link) 2957 { 2958 struct ata_port *ap = link->ap; 2959 struct ata_eh_context *ehc = &link->eh_context; 2960 struct ata_device *dev; 2961 2962 /* skip disabled links */ 2963 if (link->flags & ATA_LFLAG_DISABLED) 2964 return 1; 2965 2966 /* thaw frozen port and recover failed devices */ 2967 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 2968 return 0; 2969 2970 /* reset at least once if reset is requested */ 2971 if ((ehc->i.action & ATA_EH_RESET) && 2972 !(ehc->i.flags & ATA_EHI_DID_RESET)) 2973 return 0; 2974 2975 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2976 ata_for_each_dev(dev, link, ALL) { 2977 if (dev->class == ATA_DEV_UNKNOWN && 2978 ehc->classes[dev->devno] != ATA_DEV_NONE) 2979 return 0; 2980 } 2981 2982 return 1; 2983 } 2984 2985 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 2986 { 2987 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 2988 u64 now = get_jiffies_64(); 2989 int *trials = void_arg; 2990 2991 if (ent->timestamp < now - min(now, interval)) 2992 return -1; 2993 2994 (*trials)++; 2995 return 0; 2996 } 2997 2998 static int ata_eh_schedule_probe(struct ata_device *dev) 2999 { 3000 struct ata_eh_context *ehc = &dev->link->eh_context; 3001 struct ata_link *link = ata_dev_phys_link(dev); 3002 int trials = 0; 3003 3004 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3005 (ehc->did_probe_mask & (1 << dev->devno))) 3006 return 0; 3007 3008 ata_eh_detach_dev(dev); 3009 ata_dev_init(dev); 3010 ehc->did_probe_mask |= (1 << dev->devno); 3011 ehc->i.action |= ATA_EH_RESET; 3012 ehc->saved_xfer_mode[dev->devno] = 0; 3013 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3014 3015 /* Record and count probe trials on the ering. The specific 3016 * error mask used is irrelevant. Because a successful device 3017 * detection clears the ering, this count accumulates only if 3018 * there are consecutive failed probes. 3019 * 3020 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3021 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3022 * forced to 1.5Gbps. 3023 * 3024 * This is to work around cases where failed link speed 3025 * negotiation results in device misdetection leading to 3026 * infinite DEVXCHG or PHRDY CHG events. 3027 */ 3028 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3029 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3030 3031 if (trials > ATA_EH_PROBE_TRIALS) 3032 sata_down_spd_limit(link, 1); 3033 3034 return 1; 3035 } 3036 3037 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3038 { 3039 struct ata_eh_context *ehc = &dev->link->eh_context; 3040 3041 /* -EAGAIN from EH routine indicates retry without prejudice. 3042 * The requester is responsible for ensuring forward progress. 3043 */ 3044 if (err != -EAGAIN) 3045 ehc->tries[dev->devno]--; 3046 3047 switch (err) { 3048 case -ENODEV: 3049 /* device missing or wrong IDENTIFY data, schedule probing */ 3050 ehc->i.probe_mask |= (1 << dev->devno); 3051 case -EINVAL: 3052 /* give it just one more chance */ 3053 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3054 case -EIO: 3055 if (ehc->tries[dev->devno] == 1) { 3056 /* This is the last chance, better to slow 3057 * down than lose it. 3058 */ 3059 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3060 if (dev->pio_mode > XFER_PIO_0) 3061 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3062 } 3063 } 3064 3065 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3066 /* disable device if it has used up all its chances */ 3067 ata_dev_disable(dev); 3068 3069 /* detach if offline */ 3070 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3071 ata_eh_detach_dev(dev); 3072 3073 /* schedule probe if necessary */ 3074 if (ata_eh_schedule_probe(dev)) { 3075 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3076 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3077 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3078 } 3079 3080 return 1; 3081 } else { 3082 ehc->i.action |= ATA_EH_RESET; 3083 return 0; 3084 } 3085 } 3086 3087 /** 3088 * ata_eh_recover - recover host port after error 3089 * @ap: host port to recover 3090 * @prereset: prereset method (can be NULL) 3091 * @softreset: softreset method (can be NULL) 3092 * @hardreset: hardreset method (can be NULL) 3093 * @postreset: postreset method (can be NULL) 3094 * @r_failed_link: out parameter for failed link 3095 * 3096 * This is the alpha and omega, eum and yang, heart and soul of 3097 * libata exception handling. On entry, actions required to 3098 * recover each link and hotplug requests are recorded in the 3099 * link's eh_context. This function executes all the operations 3100 * with appropriate retrials and fallbacks to resurrect failed 3101 * devices, detach goners and greet newcomers. 3102 * 3103 * LOCKING: 3104 * Kernel thread context (may sleep). 3105 * 3106 * RETURNS: 3107 * 0 on success, -errno on failure. 3108 */ 3109 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3110 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3111 ata_postreset_fn_t postreset, 3112 struct ata_link **r_failed_link) 3113 { 3114 struct ata_link *link; 3115 struct ata_device *dev; 3116 int nr_failed_devs; 3117 int rc; 3118 unsigned long flags, deadline; 3119 3120 DPRINTK("ENTER\n"); 3121 3122 /* prep for recovery */ 3123 ata_for_each_link(link, ap, EDGE) { 3124 struct ata_eh_context *ehc = &link->eh_context; 3125 3126 /* re-enable link? */ 3127 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3128 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3129 spin_lock_irqsave(ap->lock, flags); 3130 link->flags &= ~ATA_LFLAG_DISABLED; 3131 spin_unlock_irqrestore(ap->lock, flags); 3132 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3133 } 3134 3135 ata_for_each_dev(dev, link, ALL) { 3136 if (link->flags & ATA_LFLAG_NO_RETRY) 3137 ehc->tries[dev->devno] = 1; 3138 else 3139 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3140 3141 /* collect port action mask recorded in dev actions */ 3142 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3143 ~ATA_EH_PERDEV_MASK; 3144 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3145 3146 /* process hotplug request */ 3147 if (dev->flags & ATA_DFLAG_DETACH) 3148 ata_eh_detach_dev(dev); 3149 3150 /* schedule probe if necessary */ 3151 if (!ata_dev_enabled(dev)) 3152 ata_eh_schedule_probe(dev); 3153 } 3154 } 3155 3156 retry: 3157 rc = 0; 3158 nr_failed_devs = 0; 3159 3160 /* if UNLOADING, finish immediately */ 3161 if (ap->pflags & ATA_PFLAG_UNLOADING) 3162 goto out; 3163 3164 /* prep for EH */ 3165 ata_for_each_link(link, ap, EDGE) { 3166 struct ata_eh_context *ehc = &link->eh_context; 3167 3168 /* skip EH if possible. */ 3169 if (ata_eh_skip_recovery(link)) 3170 ehc->i.action = 0; 3171 3172 ata_for_each_dev(dev, link, ALL) 3173 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3174 } 3175 3176 /* reset */ 3177 ata_for_each_link(link, ap, EDGE) { 3178 struct ata_eh_context *ehc = &link->eh_context; 3179 3180 if (!(ehc->i.action & ATA_EH_RESET)) 3181 continue; 3182 3183 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3184 prereset, softreset, hardreset, postreset); 3185 if (rc) { 3186 ata_link_printk(link, KERN_ERR, 3187 "reset failed, giving up\n"); 3188 goto out; 3189 } 3190 } 3191 3192 do { 3193 unsigned long now; 3194 3195 /* 3196 * clears ATA_EH_PARK in eh_info and resets 3197 * ap->park_req_pending 3198 */ 3199 ata_eh_pull_park_action(ap); 3200 3201 deadline = jiffies; 3202 ata_for_each_link(link, ap, EDGE) { 3203 ata_for_each_dev(dev, link, ALL) { 3204 struct ata_eh_context *ehc = &link->eh_context; 3205 unsigned long tmp; 3206 3207 if (dev->class != ATA_DEV_ATA) 3208 continue; 3209 if (!(ehc->i.dev_action[dev->devno] & 3210 ATA_EH_PARK)) 3211 continue; 3212 tmp = dev->unpark_deadline; 3213 if (time_before(deadline, tmp)) 3214 deadline = tmp; 3215 else if (time_before_eq(tmp, jiffies)) 3216 continue; 3217 if (ehc->unloaded_mask & (1 << dev->devno)) 3218 continue; 3219 3220 ata_eh_park_issue_cmd(dev, 1); 3221 } 3222 } 3223 3224 now = jiffies; 3225 if (time_before_eq(deadline, now)) 3226 break; 3227 3228 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3229 deadline - now); 3230 } while (deadline); 3231 ata_for_each_link(link, ap, EDGE) { 3232 ata_for_each_dev(dev, link, ALL) { 3233 if (!(link->eh_context.unloaded_mask & 3234 (1 << dev->devno))) 3235 continue; 3236 3237 ata_eh_park_issue_cmd(dev, 0); 3238 ata_eh_done(link, dev, ATA_EH_PARK); 3239 } 3240 } 3241 3242 /* the rest */ 3243 ata_for_each_link(link, ap, EDGE) { 3244 struct ata_eh_context *ehc = &link->eh_context; 3245 3246 /* revalidate existing devices and attach new ones */ 3247 rc = ata_eh_revalidate_and_attach(link, &dev); 3248 if (rc) 3249 goto dev_fail; 3250 3251 /* if PMP got attached, return, pmp EH will take care of it */ 3252 if (link->device->class == ATA_DEV_PMP) { 3253 ehc->i.action = 0; 3254 return 0; 3255 } 3256 3257 /* configure transfer mode if necessary */ 3258 if (ehc->i.flags & ATA_EHI_SETMODE) { 3259 rc = ata_set_mode(link, &dev); 3260 if (rc) 3261 goto dev_fail; 3262 ehc->i.flags &= ~ATA_EHI_SETMODE; 3263 } 3264 3265 /* If reset has been issued, clear UA to avoid 3266 * disrupting the current users of the device. 3267 */ 3268 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3269 ata_for_each_dev(dev, link, ALL) { 3270 if (dev->class != ATA_DEV_ATAPI) 3271 continue; 3272 rc = atapi_eh_clear_ua(dev); 3273 if (rc) 3274 goto dev_fail; 3275 } 3276 } 3277 3278 /* configure link power saving */ 3279 if (ehc->i.action & ATA_EH_LPM) 3280 ata_for_each_dev(dev, link, ALL) 3281 ata_dev_enable_pm(dev, ap->pm_policy); 3282 3283 /* this link is okay now */ 3284 ehc->i.flags = 0; 3285 continue; 3286 3287 dev_fail: 3288 nr_failed_devs++; 3289 ata_eh_handle_dev_fail(dev, rc); 3290 3291 if (ap->pflags & ATA_PFLAG_FROZEN) { 3292 /* PMP reset requires working host port. 3293 * Can't retry if it's frozen. 3294 */ 3295 if (sata_pmp_attached(ap)) 3296 goto out; 3297 break; 3298 } 3299 } 3300 3301 if (nr_failed_devs) 3302 goto retry; 3303 3304 out: 3305 if (rc && r_failed_link) 3306 *r_failed_link = link; 3307 3308 DPRINTK("EXIT, rc=%d\n", rc); 3309 return rc; 3310 } 3311 3312 /** 3313 * ata_eh_finish - finish up EH 3314 * @ap: host port to finish EH for 3315 * 3316 * Recovery is complete. Clean up EH states and retry or finish 3317 * failed qcs. 3318 * 3319 * LOCKING: 3320 * None. 3321 */ 3322 void ata_eh_finish(struct ata_port *ap) 3323 { 3324 int tag; 3325 3326 /* retry or finish qcs */ 3327 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 3328 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 3329 3330 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3331 continue; 3332 3333 if (qc->err_mask) { 3334 /* FIXME: Once EH migration is complete, 3335 * generate sense data in this function, 3336 * considering both err_mask and tf. 3337 */ 3338 if (qc->flags & ATA_QCFLAG_RETRY) 3339 ata_eh_qc_retry(qc); 3340 else 3341 ata_eh_qc_complete(qc); 3342 } else { 3343 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3344 ata_eh_qc_complete(qc); 3345 } else { 3346 /* feed zero TF to sense generation */ 3347 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3348 ata_eh_qc_retry(qc); 3349 } 3350 } 3351 } 3352 3353 /* make sure nr_active_links is zero after EH */ 3354 WARN_ON(ap->nr_active_links); 3355 ap->nr_active_links = 0; 3356 } 3357 3358 /** 3359 * ata_do_eh - do standard error handling 3360 * @ap: host port to handle error for 3361 * 3362 * @prereset: prereset method (can be NULL) 3363 * @softreset: softreset method (can be NULL) 3364 * @hardreset: hardreset method (can be NULL) 3365 * @postreset: postreset method (can be NULL) 3366 * 3367 * Perform standard error handling sequence. 3368 * 3369 * LOCKING: 3370 * Kernel thread context (may sleep). 3371 */ 3372 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 3373 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3374 ata_postreset_fn_t postreset) 3375 { 3376 struct ata_device *dev; 3377 int rc; 3378 3379 ata_eh_autopsy(ap); 3380 ata_eh_report(ap); 3381 3382 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 3383 NULL); 3384 if (rc) { 3385 ata_for_each_dev(dev, &ap->link, ALL) 3386 ata_dev_disable(dev); 3387 } 3388 3389 ata_eh_finish(ap); 3390 } 3391 3392 /** 3393 * ata_std_error_handler - standard error handler 3394 * @ap: host port to handle error for 3395 * 3396 * Standard error handler 3397 * 3398 * LOCKING: 3399 * Kernel thread context (may sleep). 3400 */ 3401 void ata_std_error_handler(struct ata_port *ap) 3402 { 3403 struct ata_port_operations *ops = ap->ops; 3404 ata_reset_fn_t hardreset = ops->hardreset; 3405 3406 /* ignore built-in hardreset if SCR access is not available */ 3407 if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) 3408 hardreset = NULL; 3409 3410 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 3411 } 3412 3413 #ifdef CONFIG_PM 3414 /** 3415 * ata_eh_handle_port_suspend - perform port suspend operation 3416 * @ap: port to suspend 3417 * 3418 * Suspend @ap. 3419 * 3420 * LOCKING: 3421 * Kernel thread context (may sleep). 3422 */ 3423 static void ata_eh_handle_port_suspend(struct ata_port *ap) 3424 { 3425 unsigned long flags; 3426 int rc = 0; 3427 3428 /* are we suspending? */ 3429 spin_lock_irqsave(ap->lock, flags); 3430 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3431 ap->pm_mesg.event == PM_EVENT_ON) { 3432 spin_unlock_irqrestore(ap->lock, flags); 3433 return; 3434 } 3435 spin_unlock_irqrestore(ap->lock, flags); 3436 3437 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 3438 3439 /* tell ACPI we're suspending */ 3440 rc = ata_acpi_on_suspend(ap); 3441 if (rc) 3442 goto out; 3443 3444 /* suspend */ 3445 ata_eh_freeze_port(ap); 3446 3447 if (ap->ops->port_suspend) 3448 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 3449 3450 ata_acpi_set_state(ap, PMSG_SUSPEND); 3451 out: 3452 /* report result */ 3453 spin_lock_irqsave(ap->lock, flags); 3454 3455 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 3456 if (rc == 0) 3457 ap->pflags |= ATA_PFLAG_SUSPENDED; 3458 else if (ap->pflags & ATA_PFLAG_FROZEN) 3459 ata_port_schedule_eh(ap); 3460 3461 if (ap->pm_result) { 3462 *ap->pm_result = rc; 3463 ap->pm_result = NULL; 3464 } 3465 3466 spin_unlock_irqrestore(ap->lock, flags); 3467 3468 return; 3469 } 3470 3471 /** 3472 * ata_eh_handle_port_resume - perform port resume operation 3473 * @ap: port to resume 3474 * 3475 * Resume @ap. 3476 * 3477 * LOCKING: 3478 * Kernel thread context (may sleep). 3479 */ 3480 static void ata_eh_handle_port_resume(struct ata_port *ap) 3481 { 3482 unsigned long flags; 3483 int rc = 0; 3484 3485 /* are we resuming? */ 3486 spin_lock_irqsave(ap->lock, flags); 3487 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3488 ap->pm_mesg.event != PM_EVENT_ON) { 3489 spin_unlock_irqrestore(ap->lock, flags); 3490 return; 3491 } 3492 spin_unlock_irqrestore(ap->lock, flags); 3493 3494 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 3495 3496 ata_acpi_set_state(ap, PMSG_ON); 3497 3498 if (ap->ops->port_resume) 3499 rc = ap->ops->port_resume(ap); 3500 3501 /* tell ACPI that we're resuming */ 3502 ata_acpi_on_resume(ap); 3503 3504 /* report result */ 3505 spin_lock_irqsave(ap->lock, flags); 3506 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 3507 if (ap->pm_result) { 3508 *ap->pm_result = rc; 3509 ap->pm_result = NULL; 3510 } 3511 spin_unlock_irqrestore(ap->lock, flags); 3512 } 3513 #endif /* CONFIG_PM */ 3514