1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ 2 /* 3 * aoecmd.c 4 * Filesystem request handling methods 5 */ 6 7 #include <linux/hdreg.h> 8 #include <linux/blkdev.h> 9 #include <linux/skbuff.h> 10 #include <linux/netdevice.h> 11 #include <linux/genhd.h> 12 #include <asm/unaligned.h> 13 #include "aoe.h" 14 15 #define TIMERTICK (HZ / 10) 16 #define MINTIMER (2 * TIMERTICK) 17 #define MAXTIMER (HZ << 1) 18 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */ 19 20 static struct sk_buff * 21 new_skb(struct net_device *if_dev, ulong len) 22 { 23 struct sk_buff *skb; 24 25 skb = alloc_skb(len, GFP_ATOMIC); 26 if (skb) { 27 skb->nh.raw = skb->mac.raw = skb->data; 28 skb->dev = if_dev; 29 skb->protocol = __constant_htons(ETH_P_AOE); 30 skb->priority = 0; 31 skb_put(skb, len); 32 memset(skb->head, 0, len); 33 skb->next = skb->prev = NULL; 34 35 /* tell the network layer not to perform IP checksums 36 * or to get the NIC to do it 37 */ 38 skb->ip_summed = CHECKSUM_NONE; 39 } 40 return skb; 41 } 42 43 static struct sk_buff * 44 skb_prepare(struct aoedev *d, struct frame *f) 45 { 46 struct sk_buff *skb; 47 char *p; 48 49 skb = new_skb(d->ifp, f->ndata + f->writedatalen); 50 if (!skb) { 51 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n"); 52 return NULL; 53 } 54 55 p = skb->mac.raw; 56 memcpy(p, f->data, f->ndata); 57 58 if (f->writedatalen) { 59 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); 60 memcpy(p, f->bufaddr, f->writedatalen); 61 } 62 63 return skb; 64 } 65 66 static struct frame * 67 getframe(struct aoedev *d, int tag) 68 { 69 struct frame *f, *e; 70 71 f = d->frames; 72 e = f + d->nframes; 73 for (; f<e; f++) 74 if (f->tag == tag) 75 return f; 76 return NULL; 77 } 78 79 /* 80 * Leave the top bit clear so we have tagspace for userland. 81 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. 82 * This driver reserves tag -1 to mean "unused frame." 83 */ 84 static int 85 newtag(struct aoedev *d) 86 { 87 register ulong n; 88 89 n = jiffies & 0xffff; 90 return n |= (++d->lasttag & 0x7fff) << 16; 91 } 92 93 static int 94 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) 95 { 96 u32 host_tag = newtag(d); 97 98 memcpy(h->src, d->ifp->dev_addr, sizeof h->src); 99 memcpy(h->dst, d->addr, sizeof h->dst); 100 h->type = __constant_cpu_to_be16(ETH_P_AOE); 101 h->verfl = AOE_HVER; 102 h->major = cpu_to_be16(d->aoemajor); 103 h->minor = d->aoeminor; 104 h->cmd = AOECMD_ATA; 105 h->tag = cpu_to_be32(host_tag); 106 107 return host_tag; 108 } 109 110 static void 111 aoecmd_ata_rw(struct aoedev *d, struct frame *f) 112 { 113 struct aoe_hdr *h; 114 struct aoe_atahdr *ah; 115 struct buf *buf; 116 struct sk_buff *skb; 117 ulong bcnt; 118 register sector_t sector; 119 char writebit, extbit; 120 121 writebit = 0x10; 122 extbit = 0x4; 123 124 buf = d->inprocess; 125 126 sector = buf->sector; 127 bcnt = buf->bv_resid; 128 if (bcnt > MAXATADATA) 129 bcnt = MAXATADATA; 130 131 /* initialize the headers & frame */ 132 h = (struct aoe_hdr *) f->data; 133 ah = (struct aoe_atahdr *) (h+1); 134 f->ndata = sizeof *h + sizeof *ah; 135 memset(h, 0, f->ndata); 136 f->tag = aoehdr_atainit(d, h); 137 f->waited = 0; 138 f->buf = buf; 139 f->bufaddr = buf->bufaddr; 140 141 /* set up ata header */ 142 ah->scnt = bcnt >> 9; 143 ah->lba0 = sector; 144 ah->lba1 = sector >>= 8; 145 ah->lba2 = sector >>= 8; 146 ah->lba3 = sector >>= 8; 147 if (d->flags & DEVFL_EXT) { 148 ah->aflags |= AOEAFL_EXT; 149 ah->lba4 = sector >>= 8; 150 ah->lba5 = sector >>= 8; 151 } else { 152 extbit = 0; 153 ah->lba3 &= 0x0f; 154 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 155 } 156 157 if (bio_data_dir(buf->bio) == WRITE) { 158 ah->aflags |= AOEAFL_WRITE; 159 f->writedatalen = bcnt; 160 } else { 161 writebit = 0; 162 f->writedatalen = 0; 163 } 164 165 ah->cmdstat = WIN_READ | writebit | extbit; 166 167 /* mark all tracking fields and load out */ 168 buf->nframesout += 1; 169 buf->bufaddr += bcnt; 170 buf->bv_resid -= bcnt; 171 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */ 172 buf->resid -= bcnt; 173 buf->sector += bcnt >> 9; 174 if (buf->resid == 0) { 175 d->inprocess = NULL; 176 } else if (buf->bv_resid == 0) { 177 buf->bv++; 178 buf->bv_resid = buf->bv->bv_len; 179 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; 180 } 181 182 skb = skb_prepare(d, f); 183 if (skb) { 184 skb->next = NULL; 185 if (d->sendq_hd) 186 d->sendq_tl->next = skb; 187 else 188 d->sendq_hd = skb; 189 d->sendq_tl = skb; 190 } 191 } 192 193 /* some callers cannot sleep, and they can call this function, 194 * transmitting the packets later, when interrupts are on 195 */ 196 static struct sk_buff * 197 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) 198 { 199 struct aoe_hdr *h; 200 struct aoe_cfghdr *ch; 201 struct sk_buff *skb, *sl, *sl_tail; 202 struct net_device *ifp; 203 204 sl = sl_tail = NULL; 205 206 read_lock(&dev_base_lock); 207 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { 208 dev_hold(ifp); 209 if (!is_aoe_netif(ifp)) 210 continue; 211 212 skb = new_skb(ifp, sizeof *h + sizeof *ch); 213 if (skb == NULL) { 214 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); 215 continue; 216 } 217 if (sl_tail == NULL) 218 sl_tail = skb; 219 h = (struct aoe_hdr *) skb->mac.raw; 220 memset(h, 0, sizeof *h + sizeof *ch); 221 222 memset(h->dst, 0xff, sizeof h->dst); 223 memcpy(h->src, ifp->dev_addr, sizeof h->src); 224 h->type = __constant_cpu_to_be16(ETH_P_AOE); 225 h->verfl = AOE_HVER; 226 h->major = cpu_to_be16(aoemajor); 227 h->minor = aoeminor; 228 h->cmd = AOECMD_CFG; 229 230 skb->next = sl; 231 sl = skb; 232 } 233 read_unlock(&dev_base_lock); 234 235 if (tail != NULL) 236 *tail = sl_tail; 237 return sl; 238 } 239 240 /* enters with d->lock held */ 241 void 242 aoecmd_work(struct aoedev *d) 243 { 244 struct frame *f; 245 struct buf *buf; 246 247 if (d->flags & DEVFL_PAUSE) { 248 if (!aoedev_isbusy(d)) 249 d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor, 250 d->aoeminor, &d->sendq_tl); 251 return; 252 } 253 254 loop: 255 f = getframe(d, FREETAG); 256 if (f == NULL) 257 return; 258 if (d->inprocess == NULL) { 259 if (list_empty(&d->bufq)) 260 return; 261 buf = container_of(d->bufq.next, struct buf, bufs); 262 list_del(d->bufq.next); 263 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */ 264 d->inprocess = buf; 265 } 266 aoecmd_ata_rw(d, f); 267 goto loop; 268 } 269 270 static void 271 rexmit(struct aoedev *d, struct frame *f) 272 { 273 struct sk_buff *skb; 274 struct aoe_hdr *h; 275 char buf[128]; 276 u32 n; 277 278 n = newtag(d); 279 280 snprintf(buf, sizeof buf, 281 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", 282 "retransmit", 283 d->aoemajor, d->aoeminor, f->tag, jiffies, n); 284 aoechr_error(buf); 285 286 h = (struct aoe_hdr *) f->data; 287 f->tag = n; 288 h->tag = cpu_to_be32(n); 289 memcpy(h->dst, d->addr, sizeof h->dst); 290 memcpy(h->src, d->ifp->dev_addr, sizeof h->src); 291 292 skb = skb_prepare(d, f); 293 if (skb) { 294 skb->next = NULL; 295 if (d->sendq_hd) 296 d->sendq_tl->next = skb; 297 else 298 d->sendq_hd = skb; 299 d->sendq_tl = skb; 300 } 301 } 302 303 static int 304 tsince(int tag) 305 { 306 int n; 307 308 n = jiffies & 0xffff; 309 n -= tag & 0xffff; 310 if (n < 0) 311 n += 1<<16; 312 return n; 313 } 314 315 static void 316 rexmit_timer(ulong vp) 317 { 318 struct aoedev *d; 319 struct frame *f, *e; 320 struct sk_buff *sl; 321 register long timeout; 322 ulong flags, n; 323 324 d = (struct aoedev *) vp; 325 sl = NULL; 326 327 /* timeout is always ~150% of the moving average */ 328 timeout = d->rttavg; 329 timeout += timeout >> 1; 330 331 spin_lock_irqsave(&d->lock, flags); 332 333 if (d->flags & DEVFL_TKILL) { 334 spin_unlock_irqrestore(&d->lock, flags); 335 return; 336 } 337 f = d->frames; 338 e = f + d->nframes; 339 for (; f<e; f++) { 340 if (f->tag != FREETAG && tsince(f->tag) >= timeout) { 341 n = f->waited += timeout; 342 n /= HZ; 343 if (n > MAXWAIT) { /* waited too long. device failure. */ 344 aoedev_downdev(d); 345 break; 346 } 347 rexmit(d, f); 348 } 349 } 350 351 sl = d->sendq_hd; 352 d->sendq_hd = d->sendq_tl = NULL; 353 if (sl) { 354 n = d->rttavg <<= 1; 355 if (n > MAXTIMER) 356 d->rttavg = MAXTIMER; 357 } 358 359 d->timer.expires = jiffies + TIMERTICK; 360 add_timer(&d->timer); 361 362 spin_unlock_irqrestore(&d->lock, flags); 363 364 aoenet_xmit(sl); 365 } 366 367 /* this function performs work that has been deferred until sleeping is OK 368 */ 369 void 370 aoecmd_sleepwork(void *vp) 371 { 372 struct aoedev *d = (struct aoedev *) vp; 373 374 if (d->flags & DEVFL_GDALLOC) 375 aoeblk_gdalloc(d); 376 377 if (d->flags & DEVFL_NEWSIZE) { 378 struct block_device *bd; 379 unsigned long flags; 380 u64 ssize; 381 382 ssize = d->gd->capacity; 383 bd = bdget_disk(d->gd, 0); 384 385 if (bd) { 386 mutex_lock(&bd->bd_inode->i_mutex); 387 i_size_write(bd->bd_inode, (loff_t)ssize<<9); 388 mutex_unlock(&bd->bd_inode->i_mutex); 389 bdput(bd); 390 } 391 spin_lock_irqsave(&d->lock, flags); 392 d->flags |= DEVFL_UP; 393 d->flags &= ~DEVFL_NEWSIZE; 394 spin_unlock_irqrestore(&d->lock, flags); 395 } 396 } 397 398 static void 399 ataid_complete(struct aoedev *d, unsigned char *id) 400 { 401 u64 ssize; 402 u16 n; 403 404 /* word 83: command set supported */ 405 n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1])); 406 407 /* word 86: command set/feature enabled */ 408 n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1])); 409 410 if (n & (1<<10)) { /* bit 10: LBA 48 */ 411 d->flags |= DEVFL_EXT; 412 413 /* word 100: number lba48 sectors */ 414 ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1])); 415 416 /* set as in ide-disk.c:init_idedisk_capacity */ 417 d->geo.cylinders = ssize; 418 d->geo.cylinders /= (255 * 63); 419 d->geo.heads = 255; 420 d->geo.sectors = 63; 421 } else { 422 d->flags &= ~DEVFL_EXT; 423 424 /* number lba28 sectors */ 425 ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1])); 426 427 /* NOTE: obsolete in ATA 6 */ 428 d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1])); 429 d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1])); 430 d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1])); 431 } 432 433 if (d->ssize != ssize) 434 printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu " 435 "sectors\n", (unsigned long long)mac_addr(d->addr), 436 d->aoemajor, d->aoeminor, 437 d->fw_ver, (long long)ssize); 438 d->ssize = ssize; 439 d->geo.start = 0; 440 if (d->gd != NULL) { 441 d->gd->capacity = ssize; 442 d->flags |= DEVFL_NEWSIZE; 443 } else { 444 if (d->flags & DEVFL_GDALLOC) { 445 printk(KERN_INFO "aoe: %s: %s e%lu.%lu, %s\n", 446 __FUNCTION__, 447 "can't schedule work for", 448 d->aoemajor, d->aoeminor, 449 "it's already on! (This really shouldn't happen).\n"); 450 return; 451 } 452 d->flags |= DEVFL_GDALLOC; 453 } 454 schedule_work(&d->work); 455 } 456 457 static void 458 calc_rttavg(struct aoedev *d, int rtt) 459 { 460 register long n; 461 462 n = rtt; 463 if (n < MINTIMER) 464 n = MINTIMER; 465 else if (n > MAXTIMER) 466 n = MAXTIMER; 467 468 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ 469 n -= d->rttavg; 470 d->rttavg += n >> 2; 471 } 472 473 void 474 aoecmd_ata_rsp(struct sk_buff *skb) 475 { 476 struct aoedev *d; 477 struct aoe_hdr *hin; 478 struct aoe_atahdr *ahin, *ahout; 479 struct frame *f; 480 struct buf *buf; 481 struct sk_buff *sl; 482 register long n; 483 ulong flags; 484 char ebuf[128]; 485 u16 aoemajor; 486 487 hin = (struct aoe_hdr *) skb->mac.raw; 488 aoemajor = be16_to_cpu(hin->major); 489 d = aoedev_by_aoeaddr(aoemajor, hin->minor); 490 if (d == NULL) { 491 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " 492 "for unknown device %d.%d\n", 493 aoemajor, hin->minor); 494 aoechr_error(ebuf); 495 return; 496 } 497 498 spin_lock_irqsave(&d->lock, flags); 499 500 f = getframe(d, be32_to_cpu(hin->tag)); 501 if (f == NULL) { 502 spin_unlock_irqrestore(&d->lock, flags); 503 snprintf(ebuf, sizeof ebuf, 504 "%15s e%d.%d tag=%08x@%08lx\n", 505 "unexpected rsp", 506 be16_to_cpu(hin->major), 507 hin->minor, 508 be32_to_cpu(hin->tag), 509 jiffies); 510 aoechr_error(ebuf); 511 return; 512 } 513 514 calc_rttavg(d, tsince(f->tag)); 515 516 ahin = (struct aoe_atahdr *) (hin+1); 517 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr)); 518 buf = f->buf; 519 520 if (ahout->cmdstat == WIN_IDENTIFY) 521 d->flags &= ~DEVFL_PAUSE; 522 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ 523 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh " 524 "stat=%2.2Xh from e%ld.%ld\n", 525 ahout->cmdstat, ahin->cmdstat, 526 d->aoemajor, d->aoeminor); 527 if (buf) 528 buf->flags |= BUFFL_FAIL; 529 } else { 530 switch (ahout->cmdstat) { 531 case WIN_READ: 532 case WIN_READ_EXT: 533 n = ahout->scnt << 9; 534 if (skb->len - sizeof *hin - sizeof *ahin < n) { 535 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt " 536 "ata data size in read. skb->len=%d\n", 537 skb->len); 538 /* fail frame f? just returning will rexmit. */ 539 spin_unlock_irqrestore(&d->lock, flags); 540 return; 541 } 542 memcpy(f->bufaddr, ahin+1, n); 543 case WIN_WRITE: 544 case WIN_WRITE_EXT: 545 break; 546 case WIN_IDENTIFY: 547 if (skb->len - sizeof *hin - sizeof *ahin < 512) { 548 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size " 549 "in ataid. skb->len=%d\n", skb->len); 550 spin_unlock_irqrestore(&d->lock, flags); 551 return; 552 } 553 ataid_complete(d, (char *) (ahin+1)); 554 break; 555 default: 556 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " 557 "outbound ata command %2.2Xh for %d.%d\n", 558 ahout->cmdstat, 559 be16_to_cpu(hin->major), 560 hin->minor); 561 } 562 } 563 564 if (buf) { 565 buf->nframesout -= 1; 566 if (buf->nframesout == 0 && buf->resid == 0) { 567 unsigned long duration = jiffies - buf->start_time; 568 unsigned long n_sect = buf->bio->bi_size >> 9; 569 struct gendisk *disk = d->gd; 570 const int rw = bio_data_dir(buf->bio); 571 572 disk_stat_inc(disk, ios[rw]); 573 disk_stat_add(disk, ticks[rw], duration); 574 disk_stat_add(disk, sectors[rw], n_sect); 575 disk_stat_add(disk, io_ticks, duration); 576 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; 577 bio_endio(buf->bio, buf->bio->bi_size, n); 578 mempool_free(buf, d->bufpool); 579 } 580 } 581 582 f->buf = NULL; 583 f->tag = FREETAG; 584 585 aoecmd_work(d); 586 sl = d->sendq_hd; 587 d->sendq_hd = d->sendq_tl = NULL; 588 589 spin_unlock_irqrestore(&d->lock, flags); 590 aoenet_xmit(sl); 591 } 592 593 void 594 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) 595 { 596 struct sk_buff *sl; 597 598 sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL); 599 600 aoenet_xmit(sl); 601 } 602 603 /* 604 * Since we only call this in one place (and it only prepares one frame) 605 * we just return the skb. Usually we'd chain it up to the aoedev sendq. 606 */ 607 static struct sk_buff * 608 aoecmd_ata_id(struct aoedev *d) 609 { 610 struct aoe_hdr *h; 611 struct aoe_atahdr *ah; 612 struct frame *f; 613 struct sk_buff *skb; 614 615 f = getframe(d, FREETAG); 616 if (f == NULL) { 617 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. " 618 "This shouldn't happen.\n"); 619 return NULL; 620 } 621 622 /* initialize the headers & frame */ 623 h = (struct aoe_hdr *) f->data; 624 ah = (struct aoe_atahdr *) (h+1); 625 f->ndata = sizeof *h + sizeof *ah; 626 memset(h, 0, f->ndata); 627 f->tag = aoehdr_atainit(d, h); 628 f->waited = 0; 629 f->writedatalen = 0; 630 631 /* set up ata header */ 632 ah->scnt = 1; 633 ah->cmdstat = WIN_IDENTIFY; 634 ah->lba3 = 0xa0; 635 636 skb = skb_prepare(d, f); 637 638 d->rttavg = MAXTIMER; 639 d->timer.function = rexmit_timer; 640 641 return skb; 642 } 643 644 void 645 aoecmd_cfg_rsp(struct sk_buff *skb) 646 { 647 struct aoedev *d; 648 struct aoe_hdr *h; 649 struct aoe_cfghdr *ch; 650 ulong flags, sysminor, aoemajor; 651 u16 bufcnt; 652 struct sk_buff *sl; 653 enum { MAXFRAMES = 16 }; 654 655 h = (struct aoe_hdr *) skb->mac.raw; 656 ch = (struct aoe_cfghdr *) (h+1); 657 658 /* 659 * Enough people have their dip switches set backwards to 660 * warrant a loud message for this special case. 661 */ 662 aoemajor = be16_to_cpu(h->major); 663 if (aoemajor == 0xfff) { 664 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " 665 "address is all ones. Check shelf dip switches\n"); 666 return; 667 } 668 669 sysminor = SYSMINOR(aoemajor, h->minor); 670 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { 671 printk(KERN_INFO 672 "aoe: e%ld.%d: minor number too large\n", 673 aoemajor, (int) h->minor); 674 return; 675 } 676 677 bufcnt = be16_to_cpu(ch->bufcnt); 678 if (bufcnt > MAXFRAMES) /* keep it reasonable */ 679 bufcnt = MAXFRAMES; 680 681 d = aoedev_by_sysminor_m(sysminor, bufcnt); 682 if (d == NULL) { 683 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device sysminor_m failure\n"); 684 return; 685 } 686 687 spin_lock_irqsave(&d->lock, flags); 688 689 /* permit device to migrate mac and network interface */ 690 d->ifp = skb->dev; 691 memcpy(d->addr, h->src, sizeof d->addr); 692 693 /* don't change users' perspective */ 694 if (d->nopen && !(d->flags & DEVFL_PAUSE)) { 695 spin_unlock_irqrestore(&d->lock, flags); 696 return; 697 } 698 d->flags |= DEVFL_PAUSE; /* force pause */ 699 d->fw_ver = be16_to_cpu(ch->fwver); 700 701 /* check for already outstanding ataid */ 702 sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL; 703 704 spin_unlock_irqrestore(&d->lock, flags); 705 706 aoenet_xmit(sl); 707 } 708 709