1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ 2 /* 3 * aoecmd.c 4 * Filesystem request handling methods 5 */ 6 7 #include <linux/hdreg.h> 8 #include <linux/blkdev.h> 9 #include <linux/skbuff.h> 10 #include <linux/netdevice.h> 11 #include "aoe.h" 12 13 #define TIMERTICK (HZ / 10) 14 #define MINTIMER (2 * TIMERTICK) 15 #define MAXTIMER (HZ << 1) 16 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */ 17 18 static struct sk_buff * 19 new_skb(struct net_device *if_dev, ulong len) 20 { 21 struct sk_buff *skb; 22 23 skb = alloc_skb(len, GFP_ATOMIC); 24 if (skb) { 25 skb->nh.raw = skb->mac.raw = skb->data; 26 skb->dev = if_dev; 27 skb->protocol = __constant_htons(ETH_P_AOE); 28 skb->priority = 0; 29 skb_put(skb, len); 30 skb->next = skb->prev = NULL; 31 32 /* tell the network layer not to perform IP checksums 33 * or to get the NIC to do it 34 */ 35 skb->ip_summed = CHECKSUM_NONE; 36 } 37 return skb; 38 } 39 40 static struct sk_buff * 41 skb_prepare(struct aoedev *d, struct frame *f) 42 { 43 struct sk_buff *skb; 44 char *p; 45 46 skb = new_skb(d->ifp, f->ndata + f->writedatalen); 47 if (!skb) { 48 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n"); 49 return NULL; 50 } 51 52 p = skb->mac.raw; 53 memcpy(p, f->data, f->ndata); 54 55 if (f->writedatalen) { 56 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); 57 memcpy(p, f->bufaddr, f->writedatalen); 58 } 59 60 return skb; 61 } 62 63 static struct frame * 64 getframe(struct aoedev *d, int tag) 65 { 66 struct frame *f, *e; 67 68 f = d->frames; 69 e = f + d->nframes; 70 for (; f<e; f++) 71 if (f->tag == tag) 72 return f; 73 return NULL; 74 } 75 76 /* 77 * Leave the top bit clear so we have tagspace for userland. 78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. 79 * This driver reserves tag -1 to mean "unused frame." 80 */ 81 static int 82 newtag(struct aoedev *d) 83 { 84 register ulong n; 85 86 n = jiffies & 0xffff; 87 return n |= (++d->lasttag & 0x7fff) << 16; 88 } 89 90 static int 91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) 92 { 93 u32 host_tag = newtag(d); 94 95 memcpy(h->src, d->ifp->dev_addr, sizeof h->src); 96 memcpy(h->dst, d->addr, sizeof h->dst); 97 h->type = __constant_cpu_to_be16(ETH_P_AOE); 98 h->verfl = AOE_HVER; 99 h->major = cpu_to_be16(d->aoemajor); 100 h->minor = d->aoeminor; 101 h->cmd = AOECMD_ATA; 102 h->tag = cpu_to_be32(host_tag); 103 104 return host_tag; 105 } 106 107 static void 108 aoecmd_ata_rw(struct aoedev *d, struct frame *f) 109 { 110 struct aoe_hdr *h; 111 struct aoe_atahdr *ah; 112 struct buf *buf; 113 struct sk_buff *skb; 114 ulong bcnt; 115 register sector_t sector; 116 char writebit, extbit; 117 118 writebit = 0x10; 119 extbit = 0x4; 120 121 buf = d->inprocess; 122 123 sector = buf->sector; 124 bcnt = buf->bv_resid; 125 if (bcnt > MAXATADATA) 126 bcnt = MAXATADATA; 127 128 /* initialize the headers & frame */ 129 h = (struct aoe_hdr *) f->data; 130 ah = (struct aoe_atahdr *) (h+1); 131 f->ndata = sizeof *h + sizeof *ah; 132 memset(h, 0, f->ndata); 133 f->tag = aoehdr_atainit(d, h); 134 f->waited = 0; 135 f->buf = buf; 136 f->bufaddr = buf->bufaddr; 137 138 /* set up ata header */ 139 ah->scnt = bcnt >> 9; 140 ah->lba0 = sector; 141 ah->lba1 = sector >>= 8; 142 ah->lba2 = sector >>= 8; 143 ah->lba3 = sector >>= 8; 144 if (d->flags & DEVFL_EXT) { 145 ah->aflags |= AOEAFL_EXT; 146 ah->lba4 = sector >>= 8; 147 ah->lba5 = sector >>= 8; 148 } else { 149 extbit = 0; 150 ah->lba3 &= 0x0f; 151 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 152 } 153 154 if (bio_data_dir(buf->bio) == WRITE) { 155 ah->aflags |= AOEAFL_WRITE; 156 f->writedatalen = bcnt; 157 } else { 158 writebit = 0; 159 f->writedatalen = 0; 160 } 161 162 ah->cmdstat = WIN_READ | writebit | extbit; 163 164 /* mark all tracking fields and load out */ 165 buf->nframesout += 1; 166 buf->bufaddr += bcnt; 167 buf->bv_resid -= bcnt; 168 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */ 169 buf->resid -= bcnt; 170 buf->sector += bcnt >> 9; 171 if (buf->resid == 0) { 172 d->inprocess = NULL; 173 } else if (buf->bv_resid == 0) { 174 buf->bv++; 175 buf->bv_resid = buf->bv->bv_len; 176 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; 177 } 178 179 skb = skb_prepare(d, f); 180 if (skb) { 181 skb->next = NULL; 182 if (d->sendq_hd) 183 d->sendq_tl->next = skb; 184 else 185 d->sendq_hd = skb; 186 d->sendq_tl = skb; 187 } 188 } 189 190 /* enters with d->lock held */ 191 void 192 aoecmd_work(struct aoedev *d) 193 { 194 struct frame *f; 195 struct buf *buf; 196 loop: 197 f = getframe(d, FREETAG); 198 if (f == NULL) 199 return; 200 if (d->inprocess == NULL) { 201 if (list_empty(&d->bufq)) 202 return; 203 buf = container_of(d->bufq.next, struct buf, bufs); 204 list_del(d->bufq.next); 205 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */ 206 d->inprocess = buf; 207 } 208 aoecmd_ata_rw(d, f); 209 goto loop; 210 } 211 212 static void 213 rexmit(struct aoedev *d, struct frame *f) 214 { 215 struct sk_buff *skb; 216 struct aoe_hdr *h; 217 char buf[128]; 218 u32 n; 219 220 n = newtag(d); 221 222 snprintf(buf, sizeof buf, 223 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", 224 "retransmit", 225 d->aoemajor, d->aoeminor, f->tag, jiffies, n); 226 aoechr_error(buf); 227 228 h = (struct aoe_hdr *) f->data; 229 f->tag = n; 230 h->tag = cpu_to_be32(n); 231 232 skb = skb_prepare(d, f); 233 if (skb) { 234 skb->next = NULL; 235 if (d->sendq_hd) 236 d->sendq_tl->next = skb; 237 else 238 d->sendq_hd = skb; 239 d->sendq_tl = skb; 240 } 241 } 242 243 static int 244 tsince(int tag) 245 { 246 int n; 247 248 n = jiffies & 0xffff; 249 n -= tag & 0xffff; 250 if (n < 0) 251 n += 1<<16; 252 return n; 253 } 254 255 static void 256 rexmit_timer(ulong vp) 257 { 258 struct aoedev *d; 259 struct frame *f, *e; 260 struct sk_buff *sl; 261 register long timeout; 262 ulong flags, n; 263 264 d = (struct aoedev *) vp; 265 sl = NULL; 266 267 /* timeout is always ~150% of the moving average */ 268 timeout = d->rttavg; 269 timeout += timeout >> 1; 270 271 spin_lock_irqsave(&d->lock, flags); 272 273 if (d->flags & DEVFL_TKILL) { 274 tdie: spin_unlock_irqrestore(&d->lock, flags); 275 return; 276 } 277 f = d->frames; 278 e = f + d->nframes; 279 for (; f<e; f++) { 280 if (f->tag != FREETAG && tsince(f->tag) >= timeout) { 281 n = f->waited += timeout; 282 n /= HZ; 283 if (n > MAXWAIT) { /* waited too long. device failure. */ 284 aoedev_downdev(d); 285 goto tdie; 286 } 287 rexmit(d, f); 288 } 289 } 290 291 sl = d->sendq_hd; 292 d->sendq_hd = d->sendq_tl = NULL; 293 if (sl) { 294 n = d->rttavg <<= 1; 295 if (n > MAXTIMER) 296 d->rttavg = MAXTIMER; 297 } 298 299 d->timer.expires = jiffies + TIMERTICK; 300 add_timer(&d->timer); 301 302 spin_unlock_irqrestore(&d->lock, flags); 303 304 aoenet_xmit(sl); 305 } 306 307 static void 308 ataid_complete(struct aoedev *d, unsigned char *id) 309 { 310 u64 ssize; 311 u16 n; 312 313 /* word 83: command set supported */ 314 n = le16_to_cpup((__le16 *) &id[83<<1]); 315 316 /* word 86: command set/feature enabled */ 317 n |= le16_to_cpup((__le16 *) &id[86<<1]); 318 319 if (n & (1<<10)) { /* bit 10: LBA 48 */ 320 d->flags |= DEVFL_EXT; 321 322 /* word 100: number lba48 sectors */ 323 ssize = le64_to_cpup((__le64 *) &id[100<<1]); 324 325 /* set as in ide-disk.c:init_idedisk_capacity */ 326 d->geo.cylinders = ssize; 327 d->geo.cylinders /= (255 * 63); 328 d->geo.heads = 255; 329 d->geo.sectors = 63; 330 } else { 331 d->flags &= ~DEVFL_EXT; 332 333 /* number lba28 sectors */ 334 ssize = le32_to_cpup((__le32 *) &id[60<<1]); 335 336 /* NOTE: obsolete in ATA 6 */ 337 d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]); 338 d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]); 339 d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]); 340 } 341 d->ssize = ssize; 342 d->geo.start = 0; 343 if (d->gd != NULL) { 344 d->gd->capacity = ssize; 345 d->flags |= DEVFL_UP; 346 return; 347 } 348 if (d->flags & DEVFL_WORKON) { 349 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! " 350 "(This really shouldn't happen).\n"); 351 return; 352 } 353 INIT_WORK(&d->work, aoeblk_gdalloc, d); 354 schedule_work(&d->work); 355 d->flags |= DEVFL_WORKON; 356 } 357 358 static void 359 calc_rttavg(struct aoedev *d, int rtt) 360 { 361 register long n; 362 363 n = rtt; 364 if (n < MINTIMER) 365 n = MINTIMER; 366 else if (n > MAXTIMER) 367 n = MAXTIMER; 368 369 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ 370 n -= d->rttavg; 371 d->rttavg += n >> 2; 372 } 373 374 void 375 aoecmd_ata_rsp(struct sk_buff *skb) 376 { 377 struct aoedev *d; 378 struct aoe_hdr *hin; 379 struct aoe_atahdr *ahin, *ahout; 380 struct frame *f; 381 struct buf *buf; 382 struct sk_buff *sl; 383 register long n; 384 ulong flags; 385 char ebuf[128]; 386 u16 aoemajor; 387 388 hin = (struct aoe_hdr *) skb->mac.raw; 389 aoemajor = be16_to_cpu(hin->major); 390 d = aoedev_by_aoeaddr(aoemajor, hin->minor); 391 if (d == NULL) { 392 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " 393 "for unknown device %d.%d\n", 394 aoemajor, hin->minor); 395 aoechr_error(ebuf); 396 return; 397 } 398 399 spin_lock_irqsave(&d->lock, flags); 400 401 f = getframe(d, be32_to_cpu(hin->tag)); 402 if (f == NULL) { 403 spin_unlock_irqrestore(&d->lock, flags); 404 snprintf(ebuf, sizeof ebuf, 405 "%15s e%d.%d tag=%08x@%08lx\n", 406 "unexpected rsp", 407 be16_to_cpu(hin->major), 408 hin->minor, 409 be32_to_cpu(hin->tag), 410 jiffies); 411 aoechr_error(ebuf); 412 return; 413 } 414 415 calc_rttavg(d, tsince(f->tag)); 416 417 ahin = (struct aoe_atahdr *) (hin+1); 418 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr)); 419 buf = f->buf; 420 421 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ 422 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh " 423 "stat=%2.2Xh from e%ld.%ld\n", 424 ahout->cmdstat, ahin->cmdstat, 425 d->aoemajor, d->aoeminor); 426 if (buf) 427 buf->flags |= BUFFL_FAIL; 428 } else { 429 switch (ahout->cmdstat) { 430 case WIN_READ: 431 case WIN_READ_EXT: 432 n = ahout->scnt << 9; 433 if (skb->len - sizeof *hin - sizeof *ahin < n) { 434 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt " 435 "ata data size in read. skb->len=%d\n", 436 skb->len); 437 /* fail frame f? just returning will rexmit. */ 438 spin_unlock_irqrestore(&d->lock, flags); 439 return; 440 } 441 memcpy(f->bufaddr, ahin+1, n); 442 case WIN_WRITE: 443 case WIN_WRITE_EXT: 444 break; 445 case WIN_IDENTIFY: 446 if (skb->len - sizeof *hin - sizeof *ahin < 512) { 447 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size " 448 "in ataid. skb->len=%d\n", skb->len); 449 spin_unlock_irqrestore(&d->lock, flags); 450 return; 451 } 452 ataid_complete(d, (char *) (ahin+1)); 453 /* d->flags |= DEVFL_WC_UPDATE; */ 454 break; 455 default: 456 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " 457 "outbound ata command %2.2Xh for %d.%d\n", 458 ahout->cmdstat, 459 be16_to_cpu(hin->major), 460 hin->minor); 461 } 462 } 463 464 if (buf) { 465 buf->nframesout -= 1; 466 if (buf->nframesout == 0 && buf->resid == 0) { 467 unsigned long duration = jiffies - buf->start_time; 468 unsigned long n_sect = buf->bio->bi_size >> 9; 469 struct gendisk *disk = d->gd; 470 471 if (bio_data_dir(buf->bio) == WRITE) { 472 disk_stat_inc(disk, writes); 473 disk_stat_add(disk, write_ticks, duration); 474 disk_stat_add(disk, write_sectors, n_sect); 475 } else { 476 disk_stat_inc(disk, reads); 477 disk_stat_add(disk, read_ticks, duration); 478 disk_stat_add(disk, read_sectors, n_sect); 479 } 480 disk_stat_add(disk, io_ticks, duration); 481 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; 482 bio_endio(buf->bio, buf->bio->bi_size, n); 483 mempool_free(buf, d->bufpool); 484 } 485 } 486 487 f->buf = NULL; 488 f->tag = FREETAG; 489 490 aoecmd_work(d); 491 492 sl = d->sendq_hd; 493 d->sendq_hd = d->sendq_tl = NULL; 494 495 spin_unlock_irqrestore(&d->lock, flags); 496 497 aoenet_xmit(sl); 498 } 499 500 void 501 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) 502 { 503 struct aoe_hdr *h; 504 struct aoe_cfghdr *ch; 505 struct sk_buff *skb, *sl; 506 struct net_device *ifp; 507 508 sl = NULL; 509 510 read_lock(&dev_base_lock); 511 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { 512 dev_hold(ifp); 513 if (!is_aoe_netif(ifp)) 514 continue; 515 516 skb = new_skb(ifp, sizeof *h + sizeof *ch); 517 if (skb == NULL) { 518 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); 519 continue; 520 } 521 h = (struct aoe_hdr *) skb->mac.raw; 522 memset(h, 0, sizeof *h + sizeof *ch); 523 524 memset(h->dst, 0xff, sizeof h->dst); 525 memcpy(h->src, ifp->dev_addr, sizeof h->src); 526 h->type = __constant_cpu_to_be16(ETH_P_AOE); 527 h->verfl = AOE_HVER; 528 h->major = cpu_to_be16(aoemajor); 529 h->minor = aoeminor; 530 h->cmd = AOECMD_CFG; 531 532 skb->next = sl; 533 sl = skb; 534 } 535 read_unlock(&dev_base_lock); 536 537 aoenet_xmit(sl); 538 } 539 540 /* 541 * Since we only call this in one place (and it only prepares one frame) 542 * we just return the skb. Usually we'd chain it up to the aoedev sendq. 543 */ 544 static struct sk_buff * 545 aoecmd_ata_id(struct aoedev *d) 546 { 547 struct aoe_hdr *h; 548 struct aoe_atahdr *ah; 549 struct frame *f; 550 struct sk_buff *skb; 551 552 f = getframe(d, FREETAG); 553 if (f == NULL) { 554 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. " 555 "This shouldn't happen.\n"); 556 return NULL; 557 } 558 559 /* initialize the headers & frame */ 560 h = (struct aoe_hdr *) f->data; 561 ah = (struct aoe_atahdr *) (h+1); 562 f->ndata = sizeof *h + sizeof *ah; 563 memset(h, 0, f->ndata); 564 f->tag = aoehdr_atainit(d, h); 565 f->waited = 0; 566 f->writedatalen = 0; 567 568 /* this message initializes the device, so we reset the rttavg */ 569 d->rttavg = MAXTIMER; 570 571 /* set up ata header */ 572 ah->scnt = 1; 573 ah->cmdstat = WIN_IDENTIFY; 574 ah->lba3 = 0xa0; 575 576 skb = skb_prepare(d, f); 577 578 /* we now want to start the rexmit tracking */ 579 d->flags &= ~DEVFL_TKILL; 580 d->timer.data = (ulong) d; 581 d->timer.function = rexmit_timer; 582 d->timer.expires = jiffies + TIMERTICK; 583 add_timer(&d->timer); 584 585 return skb; 586 } 587 588 void 589 aoecmd_cfg_rsp(struct sk_buff *skb) 590 { 591 struct aoedev *d; 592 struct aoe_hdr *h; 593 struct aoe_cfghdr *ch; 594 ulong flags, sysminor, aoemajor; 595 u16 bufcnt; 596 struct sk_buff *sl; 597 enum { MAXFRAMES = 8 }; 598 599 h = (struct aoe_hdr *) skb->mac.raw; 600 ch = (struct aoe_cfghdr *) (h+1); 601 602 /* 603 * Enough people have their dip switches set backwards to 604 * warrant a loud message for this special case. 605 */ 606 aoemajor = be16_to_cpu(h->major); 607 if (aoemajor == 0xfff) { 608 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " 609 "address is all ones. Check shelf dip switches\n"); 610 return; 611 } 612 613 sysminor = SYSMINOR(aoemajor, h->minor); 614 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { 615 printk(KERN_INFO 616 "aoe: e%ld.%d: minor number too large\n", 617 aoemajor, (int) h->minor); 618 return; 619 } 620 621 bufcnt = be16_to_cpu(ch->bufcnt); 622 if (bufcnt > MAXFRAMES) /* keep it reasonable */ 623 bufcnt = MAXFRAMES; 624 625 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt); 626 if (d == NULL) { 627 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n"); 628 return; 629 } 630 631 spin_lock_irqsave(&d->lock, flags); 632 633 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) { 634 spin_unlock_irqrestore(&d->lock, flags); 635 return; 636 } 637 638 d->fw_ver = be16_to_cpu(ch->fwver); 639 640 /* we get here only if the device is new */ 641 sl = aoecmd_ata_id(d); 642 643 spin_unlock_irqrestore(&d->lock, flags); 644 645 aoenet_xmit(sl); 646 } 647 648