1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ 2 /* 3 * aoecmd.c 4 * Filesystem request handling methods 5 */ 6 7 #include <linux/hdreg.h> 8 #include <linux/blkdev.h> 9 #include <linux/skbuff.h> 10 #include <linux/netdevice.h> 11 #include <asm/unaligned.h> 12 #include "aoe.h" 13 14 #define TIMERTICK (HZ / 10) 15 #define MINTIMER (2 * TIMERTICK) 16 #define MAXTIMER (HZ << 1) 17 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */ 18 19 static struct sk_buff * 20 new_skb(struct net_device *if_dev, ulong len) 21 { 22 struct sk_buff *skb; 23 24 skb = alloc_skb(len, GFP_ATOMIC); 25 if (skb) { 26 skb->nh.raw = skb->mac.raw = skb->data; 27 skb->dev = if_dev; 28 skb->protocol = __constant_htons(ETH_P_AOE); 29 skb->priority = 0; 30 skb_put(skb, len); 31 skb->next = skb->prev = NULL; 32 33 /* tell the network layer not to perform IP checksums 34 * or to get the NIC to do it 35 */ 36 skb->ip_summed = CHECKSUM_NONE; 37 } 38 return skb; 39 } 40 41 static struct sk_buff * 42 skb_prepare(struct aoedev *d, struct frame *f) 43 { 44 struct sk_buff *skb; 45 char *p; 46 47 skb = new_skb(d->ifp, f->ndata + f->writedatalen); 48 if (!skb) { 49 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n"); 50 return NULL; 51 } 52 53 p = skb->mac.raw; 54 memcpy(p, f->data, f->ndata); 55 56 if (f->writedatalen) { 57 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); 58 memcpy(p, f->bufaddr, f->writedatalen); 59 } 60 61 return skb; 62 } 63 64 static struct frame * 65 getframe(struct aoedev *d, int tag) 66 { 67 struct frame *f, *e; 68 69 f = d->frames; 70 e = f + d->nframes; 71 for (; f<e; f++) 72 if (f->tag == tag) 73 return f; 74 return NULL; 75 } 76 77 /* 78 * Leave the top bit clear so we have tagspace for userland. 79 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. 80 * This driver reserves tag -1 to mean "unused frame." 81 */ 82 static int 83 newtag(struct aoedev *d) 84 { 85 register ulong n; 86 87 n = jiffies & 0xffff; 88 return n |= (++d->lasttag & 0x7fff) << 16; 89 } 90 91 static int 92 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) 93 { 94 u32 host_tag = newtag(d); 95 96 memcpy(h->src, d->ifp->dev_addr, sizeof h->src); 97 memcpy(h->dst, d->addr, sizeof h->dst); 98 h->type = __constant_cpu_to_be16(ETH_P_AOE); 99 h->verfl = AOE_HVER; 100 h->major = cpu_to_be16(d->aoemajor); 101 h->minor = d->aoeminor; 102 h->cmd = AOECMD_ATA; 103 h->tag = cpu_to_be32(host_tag); 104 105 return host_tag; 106 } 107 108 static void 109 aoecmd_ata_rw(struct aoedev *d, struct frame *f) 110 { 111 struct aoe_hdr *h; 112 struct aoe_atahdr *ah; 113 struct buf *buf; 114 struct sk_buff *skb; 115 ulong bcnt; 116 register sector_t sector; 117 char writebit, extbit; 118 119 writebit = 0x10; 120 extbit = 0x4; 121 122 buf = d->inprocess; 123 124 sector = buf->sector; 125 bcnt = buf->bv_resid; 126 if (bcnt > MAXATADATA) 127 bcnt = MAXATADATA; 128 129 /* initialize the headers & frame */ 130 h = (struct aoe_hdr *) f->data; 131 ah = (struct aoe_atahdr *) (h+1); 132 f->ndata = sizeof *h + sizeof *ah; 133 memset(h, 0, f->ndata); 134 f->tag = aoehdr_atainit(d, h); 135 f->waited = 0; 136 f->buf = buf; 137 f->bufaddr = buf->bufaddr; 138 139 /* set up ata header */ 140 ah->scnt = bcnt >> 9; 141 ah->lba0 = sector; 142 ah->lba1 = sector >>= 8; 143 ah->lba2 = sector >>= 8; 144 ah->lba3 = sector >>= 8; 145 if (d->flags & DEVFL_EXT) { 146 ah->aflags |= AOEAFL_EXT; 147 ah->lba4 = sector >>= 8; 148 ah->lba5 = sector >>= 8; 149 } else { 150 extbit = 0; 151 ah->lba3 &= 0x0f; 152 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 153 } 154 155 if (bio_data_dir(buf->bio) == WRITE) { 156 ah->aflags |= AOEAFL_WRITE; 157 f->writedatalen = bcnt; 158 } else { 159 writebit = 0; 160 f->writedatalen = 0; 161 } 162 163 ah->cmdstat = WIN_READ | writebit | extbit; 164 165 /* mark all tracking fields and load out */ 166 buf->nframesout += 1; 167 buf->bufaddr += bcnt; 168 buf->bv_resid -= bcnt; 169 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */ 170 buf->resid -= bcnt; 171 buf->sector += bcnt >> 9; 172 if (buf->resid == 0) { 173 d->inprocess = NULL; 174 } else if (buf->bv_resid == 0) { 175 buf->bv++; 176 buf->bv_resid = buf->bv->bv_len; 177 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; 178 } 179 180 skb = skb_prepare(d, f); 181 if (skb) { 182 skb->next = NULL; 183 if (d->sendq_hd) 184 d->sendq_tl->next = skb; 185 else 186 d->sendq_hd = skb; 187 d->sendq_tl = skb; 188 } 189 } 190 191 /* enters with d->lock held */ 192 void 193 aoecmd_work(struct aoedev *d) 194 { 195 struct frame *f; 196 struct buf *buf; 197 loop: 198 f = getframe(d, FREETAG); 199 if (f == NULL) 200 return; 201 if (d->inprocess == NULL) { 202 if (list_empty(&d->bufq)) 203 return; 204 buf = container_of(d->bufq.next, struct buf, bufs); 205 list_del(d->bufq.next); 206 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */ 207 d->inprocess = buf; 208 } 209 aoecmd_ata_rw(d, f); 210 goto loop; 211 } 212 213 static void 214 rexmit(struct aoedev *d, struct frame *f) 215 { 216 struct sk_buff *skb; 217 struct aoe_hdr *h; 218 char buf[128]; 219 u32 n; 220 221 n = newtag(d); 222 223 snprintf(buf, sizeof buf, 224 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", 225 "retransmit", 226 d->aoemajor, d->aoeminor, f->tag, jiffies, n); 227 aoechr_error(buf); 228 229 h = (struct aoe_hdr *) f->data; 230 f->tag = n; 231 h->tag = cpu_to_be32(n); 232 233 skb = skb_prepare(d, f); 234 if (skb) { 235 skb->next = NULL; 236 if (d->sendq_hd) 237 d->sendq_tl->next = skb; 238 else 239 d->sendq_hd = skb; 240 d->sendq_tl = skb; 241 } 242 } 243 244 static int 245 tsince(int tag) 246 { 247 int n; 248 249 n = jiffies & 0xffff; 250 n -= tag & 0xffff; 251 if (n < 0) 252 n += 1<<16; 253 return n; 254 } 255 256 static void 257 rexmit_timer(ulong vp) 258 { 259 struct aoedev *d; 260 struct frame *f, *e; 261 struct sk_buff *sl; 262 register long timeout; 263 ulong flags, n; 264 265 d = (struct aoedev *) vp; 266 sl = NULL; 267 268 /* timeout is always ~150% of the moving average */ 269 timeout = d->rttavg; 270 timeout += timeout >> 1; 271 272 spin_lock_irqsave(&d->lock, flags); 273 274 if (d->flags & DEVFL_TKILL) { 275 tdie: spin_unlock_irqrestore(&d->lock, flags); 276 return; 277 } 278 f = d->frames; 279 e = f + d->nframes; 280 for (; f<e; f++) { 281 if (f->tag != FREETAG && tsince(f->tag) >= timeout) { 282 n = f->waited += timeout; 283 n /= HZ; 284 if (n > MAXWAIT) { /* waited too long. device failure. */ 285 aoedev_downdev(d); 286 goto tdie; 287 } 288 rexmit(d, f); 289 } 290 } 291 292 sl = d->sendq_hd; 293 d->sendq_hd = d->sendq_tl = NULL; 294 if (sl) { 295 n = d->rttavg <<= 1; 296 if (n > MAXTIMER) 297 d->rttavg = MAXTIMER; 298 } 299 300 d->timer.expires = jiffies + TIMERTICK; 301 add_timer(&d->timer); 302 303 spin_unlock_irqrestore(&d->lock, flags); 304 305 aoenet_xmit(sl); 306 } 307 308 static void 309 ataid_complete(struct aoedev *d, unsigned char *id) 310 { 311 u64 ssize; 312 u16 n; 313 314 /* word 83: command set supported */ 315 n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1])); 316 317 /* word 86: command set/feature enabled */ 318 n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1])); 319 320 if (n & (1<<10)) { /* bit 10: LBA 48 */ 321 d->flags |= DEVFL_EXT; 322 323 /* word 100: number lba48 sectors */ 324 ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1])); 325 326 /* set as in ide-disk.c:init_idedisk_capacity */ 327 d->geo.cylinders = ssize; 328 d->geo.cylinders /= (255 * 63); 329 d->geo.heads = 255; 330 d->geo.sectors = 63; 331 } else { 332 d->flags &= ~DEVFL_EXT; 333 334 /* number lba28 sectors */ 335 ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1])); 336 337 /* NOTE: obsolete in ATA 6 */ 338 d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1])); 339 d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1])); 340 d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1])); 341 } 342 d->ssize = ssize; 343 d->geo.start = 0; 344 if (d->gd != NULL) { 345 d->gd->capacity = ssize; 346 d->flags |= DEVFL_UP; 347 return; 348 } 349 if (d->flags & DEVFL_WORKON) { 350 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! " 351 "(This really shouldn't happen).\n"); 352 return; 353 } 354 INIT_WORK(&d->work, aoeblk_gdalloc, d); 355 schedule_work(&d->work); 356 d->flags |= DEVFL_WORKON; 357 } 358 359 static void 360 calc_rttavg(struct aoedev *d, int rtt) 361 { 362 register long n; 363 364 n = rtt; 365 if (n < MINTIMER) 366 n = MINTIMER; 367 else if (n > MAXTIMER) 368 n = MAXTIMER; 369 370 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ 371 n -= d->rttavg; 372 d->rttavg += n >> 2; 373 } 374 375 void 376 aoecmd_ata_rsp(struct sk_buff *skb) 377 { 378 struct aoedev *d; 379 struct aoe_hdr *hin; 380 struct aoe_atahdr *ahin, *ahout; 381 struct frame *f; 382 struct buf *buf; 383 struct sk_buff *sl; 384 register long n; 385 ulong flags; 386 char ebuf[128]; 387 u16 aoemajor; 388 389 hin = (struct aoe_hdr *) skb->mac.raw; 390 aoemajor = be16_to_cpu(hin->major); 391 d = aoedev_by_aoeaddr(aoemajor, hin->minor); 392 if (d == NULL) { 393 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " 394 "for unknown device %d.%d\n", 395 aoemajor, hin->minor); 396 aoechr_error(ebuf); 397 return; 398 } 399 400 spin_lock_irqsave(&d->lock, flags); 401 402 f = getframe(d, be32_to_cpu(hin->tag)); 403 if (f == NULL) { 404 spin_unlock_irqrestore(&d->lock, flags); 405 snprintf(ebuf, sizeof ebuf, 406 "%15s e%d.%d tag=%08x@%08lx\n", 407 "unexpected rsp", 408 be16_to_cpu(hin->major), 409 hin->minor, 410 be32_to_cpu(hin->tag), 411 jiffies); 412 aoechr_error(ebuf); 413 return; 414 } 415 416 calc_rttavg(d, tsince(f->tag)); 417 418 ahin = (struct aoe_atahdr *) (hin+1); 419 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr)); 420 buf = f->buf; 421 422 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ 423 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh " 424 "stat=%2.2Xh from e%ld.%ld\n", 425 ahout->cmdstat, ahin->cmdstat, 426 d->aoemajor, d->aoeminor); 427 if (buf) 428 buf->flags |= BUFFL_FAIL; 429 } else { 430 switch (ahout->cmdstat) { 431 case WIN_READ: 432 case WIN_READ_EXT: 433 n = ahout->scnt << 9; 434 if (skb->len - sizeof *hin - sizeof *ahin < n) { 435 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt " 436 "ata data size in read. skb->len=%d\n", 437 skb->len); 438 /* fail frame f? just returning will rexmit. */ 439 spin_unlock_irqrestore(&d->lock, flags); 440 return; 441 } 442 memcpy(f->bufaddr, ahin+1, n); 443 case WIN_WRITE: 444 case WIN_WRITE_EXT: 445 break; 446 case WIN_IDENTIFY: 447 if (skb->len - sizeof *hin - sizeof *ahin < 512) { 448 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size " 449 "in ataid. skb->len=%d\n", skb->len); 450 spin_unlock_irqrestore(&d->lock, flags); 451 return; 452 } 453 ataid_complete(d, (char *) (ahin+1)); 454 /* d->flags |= DEVFL_WC_UPDATE; */ 455 break; 456 default: 457 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " 458 "outbound ata command %2.2Xh for %d.%d\n", 459 ahout->cmdstat, 460 be16_to_cpu(hin->major), 461 hin->minor); 462 } 463 } 464 465 if (buf) { 466 buf->nframesout -= 1; 467 if (buf->nframesout == 0 && buf->resid == 0) { 468 unsigned long duration = jiffies - buf->start_time; 469 unsigned long n_sect = buf->bio->bi_size >> 9; 470 struct gendisk *disk = d->gd; 471 const int rw = bio_data_dir(buf->bio); 472 473 disk_stat_inc(disk, ios[rw]); 474 disk_stat_add(disk, ticks[rw], duration); 475 disk_stat_add(disk, sectors[rw], n_sect); 476 disk_stat_add(disk, io_ticks, duration); 477 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; 478 bio_endio(buf->bio, buf->bio->bi_size, n); 479 mempool_free(buf, d->bufpool); 480 } 481 } 482 483 f->buf = NULL; 484 f->tag = FREETAG; 485 486 aoecmd_work(d); 487 488 sl = d->sendq_hd; 489 d->sendq_hd = d->sendq_tl = NULL; 490 491 spin_unlock_irqrestore(&d->lock, flags); 492 493 aoenet_xmit(sl); 494 } 495 496 void 497 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) 498 { 499 struct aoe_hdr *h; 500 struct aoe_cfghdr *ch; 501 struct sk_buff *skb, *sl; 502 struct net_device *ifp; 503 504 sl = NULL; 505 506 read_lock(&dev_base_lock); 507 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { 508 dev_hold(ifp); 509 if (!is_aoe_netif(ifp)) 510 continue; 511 512 skb = new_skb(ifp, sizeof *h + sizeof *ch); 513 if (skb == NULL) { 514 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); 515 continue; 516 } 517 h = (struct aoe_hdr *) skb->mac.raw; 518 memset(h, 0, sizeof *h + sizeof *ch); 519 520 memset(h->dst, 0xff, sizeof h->dst); 521 memcpy(h->src, ifp->dev_addr, sizeof h->src); 522 h->type = __constant_cpu_to_be16(ETH_P_AOE); 523 h->verfl = AOE_HVER; 524 h->major = cpu_to_be16(aoemajor); 525 h->minor = aoeminor; 526 h->cmd = AOECMD_CFG; 527 528 skb->next = sl; 529 sl = skb; 530 } 531 read_unlock(&dev_base_lock); 532 533 aoenet_xmit(sl); 534 } 535 536 /* 537 * Since we only call this in one place (and it only prepares one frame) 538 * we just return the skb. Usually we'd chain it up to the aoedev sendq. 539 */ 540 static struct sk_buff * 541 aoecmd_ata_id(struct aoedev *d) 542 { 543 struct aoe_hdr *h; 544 struct aoe_atahdr *ah; 545 struct frame *f; 546 struct sk_buff *skb; 547 548 f = getframe(d, FREETAG); 549 if (f == NULL) { 550 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. " 551 "This shouldn't happen.\n"); 552 return NULL; 553 } 554 555 /* initialize the headers & frame */ 556 h = (struct aoe_hdr *) f->data; 557 ah = (struct aoe_atahdr *) (h+1); 558 f->ndata = sizeof *h + sizeof *ah; 559 memset(h, 0, f->ndata); 560 f->tag = aoehdr_atainit(d, h); 561 f->waited = 0; 562 f->writedatalen = 0; 563 564 /* this message initializes the device, so we reset the rttavg */ 565 d->rttavg = MAXTIMER; 566 567 /* set up ata header */ 568 ah->scnt = 1; 569 ah->cmdstat = WIN_IDENTIFY; 570 ah->lba3 = 0xa0; 571 572 skb = skb_prepare(d, f); 573 574 /* we now want to start the rexmit tracking */ 575 d->flags &= ~DEVFL_TKILL; 576 d->timer.data = (ulong) d; 577 d->timer.function = rexmit_timer; 578 d->timer.expires = jiffies + TIMERTICK; 579 add_timer(&d->timer); 580 581 return skb; 582 } 583 584 void 585 aoecmd_cfg_rsp(struct sk_buff *skb) 586 { 587 struct aoedev *d; 588 struct aoe_hdr *h; 589 struct aoe_cfghdr *ch; 590 ulong flags, sysminor, aoemajor; 591 u16 bufcnt; 592 struct sk_buff *sl; 593 enum { MAXFRAMES = 8 }; 594 595 h = (struct aoe_hdr *) skb->mac.raw; 596 ch = (struct aoe_cfghdr *) (h+1); 597 598 /* 599 * Enough people have their dip switches set backwards to 600 * warrant a loud message for this special case. 601 */ 602 aoemajor = be16_to_cpu(h->major); 603 if (aoemajor == 0xfff) { 604 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " 605 "address is all ones. Check shelf dip switches\n"); 606 return; 607 } 608 609 sysminor = SYSMINOR(aoemajor, h->minor); 610 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { 611 printk(KERN_INFO 612 "aoe: e%ld.%d: minor number too large\n", 613 aoemajor, (int) h->minor); 614 return; 615 } 616 617 bufcnt = be16_to_cpu(ch->bufcnt); 618 if (bufcnt > MAXFRAMES) /* keep it reasonable */ 619 bufcnt = MAXFRAMES; 620 621 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt); 622 if (d == NULL) { 623 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n"); 624 return; 625 } 626 627 spin_lock_irqsave(&d->lock, flags); 628 629 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) { 630 spin_unlock_irqrestore(&d->lock, flags); 631 return; 632 } 633 634 d->fw_ver = be16_to_cpu(ch->fwver); 635 636 /* we get here only if the device is new */ 637 sl = aoecmd_ata_id(d); 638 639 spin_unlock_irqrestore(&d->lock, flags); 640 641 aoenet_xmit(sl); 642 } 643 644