1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 #include <net/if.h> 51 #include <net/if_var.h> 52 #include <net/if_arp.h> 53 #include <net/ethernet.h> 54 #include <net/if_dl.h> 55 #include <net/if_media.h> 56 57 #include <net/bpf.h> 58 59 #include <net/if_types.h> 60 #include <net/if_vlan_var.h> 61 #include <net/zlib.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip6.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_lro.h> 69 #include <netinet6/ip6_var.h> 70 71 #include <machine/bus.h> 72 #include <machine/in_cksum.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 #include <sys/smp.h> 77 78 #include <dev/pci/pcireg.h> 79 #include <dev/pci/pcivar.h> 80 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 81 82 #include <vm/vm.h> /* for pmap_mapdev() */ 83 #include <vm/pmap.h> 84 85 #if defined(__i386) || defined(__amd64) 86 #include <machine/specialreg.h> 87 #endif 88 89 #include <dev/mxge/mxge_mcp.h> 90 #include <dev/mxge/mcp_gen_header.h> 91 /*#define MXGE_FAKE_IFP*/ 92 #include <dev/mxge/if_mxge_var.h> 93 #ifdef IFNET_BUF_RING 94 #include <sys/buf_ring.h> 95 #endif 96 97 #include "opt_inet.h" 98 #include "opt_inet6.h" 99 100 /* tunable params */ 101 static int mxge_nvidia_ecrc_enable = 1; 102 static int mxge_force_firmware = 0; 103 static int mxge_intr_coal_delay = 30; 104 static int mxge_deassert_wait = 1; 105 static int mxge_flow_control = 1; 106 static int mxge_verbose = 0; 107 static int mxge_ticks; 108 static int mxge_max_slices = 1; 109 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 110 static int mxge_always_promisc = 0; 111 static int mxge_initial_mtu = ETHERMTU_JUMBO; 112 static int mxge_throttle = 0; 113 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 114 static char *mxge_fw_aligned = "mxge_eth_z8e"; 115 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 116 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 117 118 static int mxge_probe(device_t dev); 119 static int mxge_attach(device_t dev); 120 static int mxge_detach(device_t dev); 121 static int mxge_shutdown(device_t dev); 122 static void mxge_intr(void *arg); 123 124 static device_method_t mxge_methods[] = 125 { 126 /* Device interface */ 127 DEVMETHOD(device_probe, mxge_probe), 128 DEVMETHOD(device_attach, mxge_attach), 129 DEVMETHOD(device_detach, mxge_detach), 130 DEVMETHOD(device_shutdown, mxge_shutdown), 131 132 DEVMETHOD_END 133 }; 134 135 static driver_t mxge_driver = 136 { 137 "mxge", 138 mxge_methods, 139 sizeof(mxge_softc_t), 140 }; 141 142 static devclass_t mxge_devclass; 143 144 /* Declare ourselves to be a child of the PCI bus.*/ 145 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 146 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 147 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 148 149 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 150 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 151 static int mxge_close(mxge_softc_t *sc, int down); 152 static int mxge_open(mxge_softc_t *sc); 153 static void mxge_tick(void *arg); 154 155 static int 156 mxge_probe(device_t dev) 157 { 158 int rev; 159 160 161 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 162 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 163 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 164 rev = pci_get_revid(dev); 165 switch (rev) { 166 case MXGE_PCI_REV_Z8E: 167 device_set_desc(dev, "Myri10G-PCIE-8A"); 168 break; 169 case MXGE_PCI_REV_Z8ES: 170 device_set_desc(dev, "Myri10G-PCIE-8B"); 171 break; 172 default: 173 device_set_desc(dev, "Myri10G-PCIE-8??"); 174 device_printf(dev, "Unrecognized rev %d NIC\n", 175 rev); 176 break; 177 } 178 return 0; 179 } 180 return ENXIO; 181 } 182 183 static void 184 mxge_enable_wc(mxge_softc_t *sc) 185 { 186 #if defined(__i386) || defined(__amd64) 187 vm_offset_t len; 188 int err; 189 190 sc->wc = 1; 191 len = rman_get_size(sc->mem_res); 192 err = pmap_change_attr((vm_offset_t) sc->sram, 193 len, PAT_WRITE_COMBINING); 194 if (err != 0) { 195 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 196 err); 197 sc->wc = 0; 198 } 199 #endif 200 } 201 202 203 /* callback to get our DMA address */ 204 static void 205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 206 int error) 207 { 208 if (error == 0) { 209 *(bus_addr_t *) arg = segs->ds_addr; 210 } 211 } 212 213 static int 214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 215 bus_size_t alignment) 216 { 217 int err; 218 device_t dev = sc->dev; 219 bus_size_t boundary, maxsegsize; 220 221 if (bytes > 4096 && alignment == 4096) { 222 boundary = 0; 223 maxsegsize = bytes; 224 } else { 225 boundary = 4096; 226 maxsegsize = 4096; 227 } 228 229 /* allocate DMAable memory tags */ 230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 231 alignment, /* alignment */ 232 boundary, /* boundary */ 233 BUS_SPACE_MAXADDR, /* low */ 234 BUS_SPACE_MAXADDR, /* high */ 235 NULL, NULL, /* filter */ 236 bytes, /* maxsize */ 237 1, /* num segs */ 238 maxsegsize, /* maxsegsize */ 239 BUS_DMA_COHERENT, /* flags */ 240 NULL, NULL, /* lock */ 241 &dma->dmat); /* tag */ 242 if (err != 0) { 243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 244 return err; 245 } 246 247 /* allocate DMAable memory & map */ 248 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 250 | BUS_DMA_ZERO), &dma->map); 251 if (err != 0) { 252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 253 goto abort_with_dmat; 254 } 255 256 /* load the memory */ 257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 258 mxge_dmamap_callback, 259 (void *)&dma->bus_addr, 0); 260 if (err != 0) { 261 device_printf(dev, "couldn't load map (err = %d)\n", err); 262 goto abort_with_mem; 263 } 264 return 0; 265 266 abort_with_mem: 267 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 268 abort_with_dmat: 269 (void)bus_dma_tag_destroy(dma->dmat); 270 return err; 271 } 272 273 274 static void 275 mxge_dma_free(mxge_dma_t *dma) 276 { 277 bus_dmamap_unload(dma->dmat, dma->map); 278 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 279 (void)bus_dma_tag_destroy(dma->dmat); 280 } 281 282 /* 283 * The eeprom strings on the lanaiX have the format 284 * SN=x\0 285 * MAC=x:x:x:x:x:x\0 286 * PC=text\0 287 */ 288 289 static int 290 mxge_parse_strings(mxge_softc_t *sc) 291 { 292 char *ptr; 293 int i, found_mac, found_sn2; 294 char *endptr; 295 296 ptr = sc->eeprom_strings; 297 found_mac = 0; 298 found_sn2 = 0; 299 while (*ptr != '\0') { 300 if (strncmp(ptr, "MAC=", 4) == 0) { 301 ptr += 4; 302 for (i = 0;;) { 303 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 304 if (endptr - ptr != 2) 305 goto abort; 306 ptr = endptr; 307 if (++i == 6) 308 break; 309 if (*ptr++ != ':') 310 goto abort; 311 } 312 found_mac = 1; 313 } else if (strncmp(ptr, "PC=", 3) == 0) { 314 ptr += 3; 315 strlcpy(sc->product_code_string, ptr, 316 sizeof(sc->product_code_string)); 317 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 318 ptr += 3; 319 strlcpy(sc->serial_number_string, ptr, 320 sizeof(sc->serial_number_string)); 321 } else if (strncmp(ptr, "SN2=", 4) == 0) { 322 /* SN2 takes precedence over SN */ 323 ptr += 4; 324 found_sn2 = 1; 325 strlcpy(sc->serial_number_string, ptr, 326 sizeof(sc->serial_number_string)); 327 } 328 while (*ptr++ != '\0') {} 329 } 330 331 if (found_mac) 332 return 0; 333 334 abort: 335 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 336 337 return ENXIO; 338 } 339 340 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 341 static void 342 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 343 { 344 uint32_t val; 345 unsigned long base, off; 346 char *va, *cfgptr; 347 device_t pdev, mcp55; 348 uint16_t vendor_id, device_id, word; 349 uintptr_t bus, slot, func, ivend, idev; 350 uint32_t *ptr32; 351 352 353 if (!mxge_nvidia_ecrc_enable) 354 return; 355 356 pdev = device_get_parent(device_get_parent(sc->dev)); 357 if (pdev == NULL) { 358 device_printf(sc->dev, "could not find parent?\n"); 359 return; 360 } 361 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 362 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 363 364 if (vendor_id != 0x10de) 365 return; 366 367 base = 0; 368 369 if (device_id == 0x005d) { 370 /* ck804, base address is magic */ 371 base = 0xe0000000UL; 372 } else if (device_id >= 0x0374 && device_id <= 0x378) { 373 /* mcp55, base address stored in chipset */ 374 mcp55 = pci_find_bsf(0, 0, 0); 375 if (mcp55 && 376 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 377 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 378 word = pci_read_config(mcp55, 0x90, 2); 379 base = ((unsigned long)word & 0x7ffeU) << 25; 380 } 381 } 382 if (!base) 383 return; 384 385 /* XXXX 386 Test below is commented because it is believed that doing 387 config read/write beyond 0xff will access the config space 388 for the next larger function. Uncomment this and remove 389 the hacky pmap_mapdev() way of accessing config space when 390 FreeBSD grows support for extended pcie config space access 391 */ 392 #if 0 393 /* See if we can, by some miracle, access the extended 394 config space */ 395 val = pci_read_config(pdev, 0x178, 4); 396 if (val != 0xffffffff) { 397 val |= 0x40; 398 pci_write_config(pdev, 0x178, val, 4); 399 return; 400 } 401 #endif 402 /* Rather than using normal pci config space writes, we must 403 * map the Nvidia config space ourselves. This is because on 404 * opteron/nvidia class machine the 0xe000000 mapping is 405 * handled by the nvidia chipset, that means the internal PCI 406 * device (the on-chip northbridge), or the amd-8131 bridge 407 * and things behind them are not visible by this method. 408 */ 409 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_BUS, &bus); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_SLOT, &slot); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_FUNCTION, &func); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_VENDOR, &ivend); 418 BUS_READ_IVAR(device_get_parent(pdev), pdev, 419 PCI_IVAR_DEVICE, &idev); 420 421 off = base 422 + 0x00100000UL * (unsigned long)bus 423 + 0x00001000UL * (unsigned long)(func 424 + 8 * slot); 425 426 /* map it into the kernel */ 427 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 428 429 430 if (va == NULL) { 431 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 432 return; 433 } 434 /* get a pointer to the config space mapped into the kernel */ 435 cfgptr = va + (off & PAGE_MASK); 436 437 /* make sure that we can really access it */ 438 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 439 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 440 if (! (vendor_id == ivend && device_id == idev)) { 441 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 442 vendor_id, device_id); 443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 444 return; 445 } 446 447 ptr32 = (uint32_t*)(cfgptr + 0x178); 448 val = *ptr32; 449 450 if (val == 0xffffffff) { 451 device_printf(sc->dev, "extended mapping failed\n"); 452 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 453 return; 454 } 455 *ptr32 = val | 0x40; 456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 457 if (mxge_verbose) 458 device_printf(sc->dev, 459 "Enabled ECRC on upstream Nvidia bridge " 460 "at %d:%d:%d\n", 461 (int)bus, (int)slot, (int)func); 462 return; 463 } 464 #else 465 static void 466 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 467 { 468 device_printf(sc->dev, 469 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 470 return; 471 } 472 #endif 473 474 475 static int 476 mxge_dma_test(mxge_softc_t *sc, int test_type) 477 { 478 mxge_cmd_t cmd; 479 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 480 int status; 481 uint32_t len; 482 char *test = " "; 483 484 485 /* Run a small DMA test. 486 * The magic multipliers to the length tell the firmware 487 * to do DMA read, write, or read+write tests. The 488 * results are returned in cmd.data0. The upper 16 489 * bits of the return is the number of transfers completed. 490 * The lower 16 bits is the time in 0.5us ticks that the 491 * transfers took to complete. 492 */ 493 494 len = sc->tx_boundary; 495 496 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 497 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 498 cmd.data2 = len * 0x10000; 499 status = mxge_send_cmd(sc, test_type, &cmd); 500 if (status != 0) { 501 test = "read"; 502 goto abort; 503 } 504 sc->read_dma = ((cmd.data0>>16) * len * 2) / 505 (cmd.data0 & 0xffff); 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x1; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "write"; 512 goto abort; 513 } 514 sc->write_dma = ((cmd.data0>>16) * len * 2) / 515 (cmd.data0 & 0xffff); 516 517 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 518 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 519 cmd.data2 = len * 0x10001; 520 status = mxge_send_cmd(sc, test_type, &cmd); 521 if (status != 0) { 522 test = "read/write"; 523 goto abort; 524 } 525 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 526 (cmd.data0 & 0xffff); 527 528 abort: 529 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 530 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 531 test, status); 532 533 return status; 534 } 535 536 /* 537 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 538 * when the PCI-E Completion packets are aligned on an 8-byte 539 * boundary. Some PCI-E chip sets always align Completion packets; on 540 * the ones that do not, the alignment can be enforced by enabling 541 * ECRC generation (if supported). 542 * 543 * When PCI-E Completion packets are not aligned, it is actually more 544 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 545 * 546 * If the driver can neither enable ECRC nor verify that it has 547 * already been enabled, then it must use a firmware image which works 548 * around unaligned completion packets (ethp_z8e.dat), and it should 549 * also ensure that it never gives the device a Read-DMA which is 550 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 551 * enabled, then the driver should use the aligned (eth_z8e.dat) 552 * firmware image, and set tx_boundary to 4KB. 553 */ 554 555 static int 556 mxge_firmware_probe(mxge_softc_t *sc) 557 { 558 device_t dev = sc->dev; 559 int reg, status; 560 uint16_t pectl; 561 562 sc->tx_boundary = 4096; 563 /* 564 * Verify the max read request size was set to 4KB 565 * before trying the test with 4KB. 566 */ 567 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 568 pectl = pci_read_config(dev, reg + 0x8, 2); 569 if ((pectl & (5 << 12)) != (5 << 12)) { 570 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 571 pectl); 572 sc->tx_boundary = 2048; 573 } 574 } 575 576 /* 577 * load the optimized firmware (which assumes aligned PCIe 578 * completions) in order to see if it works on this host. 579 */ 580 sc->fw_name = mxge_fw_aligned; 581 status = mxge_load_firmware(sc, 1); 582 if (status != 0) { 583 return status; 584 } 585 586 /* 587 * Enable ECRC if possible 588 */ 589 mxge_enable_nvidia_ecrc(sc); 590 591 /* 592 * Run a DMA test which watches for unaligned completions and 593 * aborts on the first one seen. Not required on Z8ES or newer. 594 */ 595 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 596 return 0; 597 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 598 if (status == 0) 599 return 0; /* keep the aligned firmware */ 600 601 if (status != E2BIG) 602 device_printf(dev, "DMA test failed: %d\n", status); 603 if (status == ENOSYS) 604 device_printf(dev, "Falling back to ethp! " 605 "Please install up to date fw\n"); 606 return status; 607 } 608 609 static int 610 mxge_select_firmware(mxge_softc_t *sc) 611 { 612 int aligned = 0; 613 int force_firmware = mxge_force_firmware; 614 615 if (sc->throttle) 616 force_firmware = sc->throttle; 617 618 if (force_firmware != 0) { 619 if (force_firmware == 1) 620 aligned = 1; 621 else 622 aligned = 0; 623 if (mxge_verbose) 624 device_printf(sc->dev, 625 "Assuming %s completions (forced)\n", 626 aligned ? "aligned" : "unaligned"); 627 goto abort; 628 } 629 630 /* if the PCIe link width is 4 or less, we can use the aligned 631 firmware and skip any checks */ 632 if (sc->link_width != 0 && sc->link_width <= 4) { 633 device_printf(sc->dev, 634 "PCIe x%d Link, expect reduced performance\n", 635 sc->link_width); 636 aligned = 1; 637 goto abort; 638 } 639 640 if (0 == mxge_firmware_probe(sc)) 641 return 0; 642 643 abort: 644 if (aligned) { 645 sc->fw_name = mxge_fw_aligned; 646 sc->tx_boundary = 4096; 647 } else { 648 sc->fw_name = mxge_fw_unaligned; 649 sc->tx_boundary = 2048; 650 } 651 return (mxge_load_firmware(sc, 0)); 652 } 653 654 static int 655 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 656 { 657 658 659 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 660 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 661 be32toh(hdr->mcp_type)); 662 return EIO; 663 } 664 665 /* save firmware version for sysctl */ 666 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 667 if (mxge_verbose) 668 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 669 670 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 671 &sc->fw_ver_minor, &sc->fw_ver_tiny); 672 673 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 674 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 675 device_printf(sc->dev, "Found firmware version %s\n", 676 sc->fw_version); 677 device_printf(sc->dev, "Driver needs %d.%d\n", 678 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 679 return EINVAL; 680 } 681 return 0; 682 683 } 684 685 static void * 686 z_alloc(void *nil, u_int items, u_int size) 687 { 688 void *ptr; 689 690 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 691 return ptr; 692 } 693 694 static void 695 z_free(void *nil, void *ptr) 696 { 697 free(ptr, M_TEMP); 698 } 699 700 701 static int 702 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 703 { 704 z_stream zs; 705 char *inflate_buffer; 706 const struct firmware *fw; 707 const mcp_gen_header_t *hdr; 708 unsigned hdr_offset; 709 int status; 710 unsigned int i; 711 char dummy; 712 size_t fw_len; 713 714 fw = firmware_get(sc->fw_name); 715 if (fw == NULL) { 716 device_printf(sc->dev, "Could not find firmware image %s\n", 717 sc->fw_name); 718 return ENOENT; 719 } 720 721 722 723 /* setup zlib and decompress f/w */ 724 bzero(&zs, sizeof (zs)); 725 zs.zalloc = z_alloc; 726 zs.zfree = z_free; 727 status = inflateInit(&zs); 728 if (status != Z_OK) { 729 status = EIO; 730 goto abort_with_fw; 731 } 732 733 /* the uncompressed size is stored as the firmware version, 734 which would otherwise go unused */ 735 fw_len = (size_t) fw->version; 736 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 737 if (inflate_buffer == NULL) 738 goto abort_with_zs; 739 zs.avail_in = fw->datasize; 740 zs.next_in = __DECONST(char *, fw->data); 741 zs.avail_out = fw_len; 742 zs.next_out = inflate_buffer; 743 status = inflate(&zs, Z_FINISH); 744 if (status != Z_STREAM_END) { 745 device_printf(sc->dev, "zlib %d\n", status); 746 status = EIO; 747 goto abort_with_buffer; 748 } 749 750 /* check id */ 751 hdr_offset = htobe32(*(const uint32_t *) 752 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 753 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 754 device_printf(sc->dev, "Bad firmware file"); 755 status = EIO; 756 goto abort_with_buffer; 757 } 758 hdr = (const void*)(inflate_buffer + hdr_offset); 759 760 status = mxge_validate_firmware(sc, hdr); 761 if (status != 0) 762 goto abort_with_buffer; 763 764 /* Copy the inflated firmware to NIC SRAM. */ 765 for (i = 0; i < fw_len; i += 256) { 766 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 767 inflate_buffer + i, 768 min(256U, (unsigned)(fw_len - i))); 769 wmb(); 770 dummy = *sc->sram; 771 wmb(); 772 } 773 774 *limit = fw_len; 775 status = 0; 776 abort_with_buffer: 777 free(inflate_buffer, M_TEMP); 778 abort_with_zs: 779 inflateEnd(&zs); 780 abort_with_fw: 781 firmware_put(fw, FIRMWARE_UNLOAD); 782 return status; 783 } 784 785 /* 786 * Enable or disable periodic RDMAs from the host to make certain 787 * chipsets resend dropped PCIe messages 788 */ 789 790 static void 791 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 792 { 793 char buf_bytes[72]; 794 volatile uint32_t *confirm; 795 volatile char *submit; 796 uint32_t *buf, dma_low, dma_high; 797 int i; 798 799 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 800 801 /* clear confirmation addr */ 802 confirm = (volatile uint32_t *)sc->cmd; 803 *confirm = 0; 804 wmb(); 805 806 /* send an rdma command to the PCIe engine, and wait for the 807 response in the confirmation address. The firmware should 808 write a -1 there to indicate it is alive and well 809 */ 810 811 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 812 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 813 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 814 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 815 buf[2] = htobe32(0xffffffff); /* confirm data */ 816 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 817 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 818 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 819 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 820 buf[5] = htobe32(enable); /* enable? */ 821 822 823 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 824 825 mxge_pio_copy(submit, buf, 64); 826 wmb(); 827 DELAY(1000); 828 wmb(); 829 i = 0; 830 while (*confirm != 0xffffffff && i < 20) { 831 DELAY(1000); 832 i++; 833 } 834 if (*confirm != 0xffffffff) { 835 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 836 (enable ? "enable" : "disable"), confirm, 837 *confirm); 838 } 839 return; 840 } 841 842 static int 843 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 844 { 845 mcp_cmd_t *buf; 846 char buf_bytes[sizeof(*buf) + 8]; 847 volatile mcp_cmd_response_t *response = sc->cmd; 848 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 849 uint32_t dma_low, dma_high; 850 int err, sleep_total = 0; 851 852 /* ensure buf is aligned to 8 bytes */ 853 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 854 855 buf->data0 = htobe32(data->data0); 856 buf->data1 = htobe32(data->data1); 857 buf->data2 = htobe32(data->data2); 858 buf->cmd = htobe32(cmd); 859 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 860 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 861 862 buf->response_addr.low = htobe32(dma_low); 863 buf->response_addr.high = htobe32(dma_high); 864 mtx_lock(&sc->cmd_mtx); 865 response->result = 0xffffffff; 866 wmb(); 867 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 868 869 /* wait up to 20ms */ 870 err = EAGAIN; 871 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 872 bus_dmamap_sync(sc->cmd_dma.dmat, 873 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 874 wmb(); 875 switch (be32toh(response->result)) { 876 case 0: 877 data->data0 = be32toh(response->data); 878 err = 0; 879 break; 880 case 0xffffffff: 881 DELAY(1000); 882 break; 883 case MXGEFW_CMD_UNKNOWN: 884 err = ENOSYS; 885 break; 886 case MXGEFW_CMD_ERROR_UNALIGNED: 887 err = E2BIG; 888 break; 889 case MXGEFW_CMD_ERROR_BUSY: 890 err = EBUSY; 891 break; 892 case MXGEFW_CMD_ERROR_I2C_ABSENT: 893 err = ENXIO; 894 break; 895 default: 896 device_printf(sc->dev, 897 "mxge: command %d " 898 "failed, result = %d\n", 899 cmd, be32toh(response->result)); 900 err = ENXIO; 901 break; 902 } 903 if (err != EAGAIN) 904 break; 905 } 906 if (err == EAGAIN) 907 device_printf(sc->dev, "mxge: command %d timed out" 908 "result = %d\n", 909 cmd, be32toh(response->result)); 910 mtx_unlock(&sc->cmd_mtx); 911 return err; 912 } 913 914 static int 915 mxge_adopt_running_firmware(mxge_softc_t *sc) 916 { 917 struct mcp_gen_header *hdr; 918 const size_t bytes = sizeof (struct mcp_gen_header); 919 size_t hdr_offset; 920 int status; 921 922 /* find running firmware header */ 923 hdr_offset = htobe32(*(volatile uint32_t *) 924 (sc->sram + MCP_HEADER_PTR_OFFSET)); 925 926 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 927 device_printf(sc->dev, 928 "Running firmware has bad header offset (%d)\n", 929 (int)hdr_offset); 930 return EIO; 931 } 932 933 /* copy header of running firmware from SRAM to host memory to 934 * validate firmware */ 935 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 936 if (hdr == NULL) { 937 device_printf(sc->dev, "could not malloc firmware hdr\n"); 938 return ENOMEM; 939 } 940 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 941 rman_get_bushandle(sc->mem_res), 942 hdr_offset, (char *)hdr, bytes); 943 status = mxge_validate_firmware(sc, hdr); 944 free(hdr, M_DEVBUF); 945 946 /* 947 * check to see if adopted firmware has bug where adopting 948 * it will cause broadcasts to be filtered unless the NIC 949 * is kept in ALLMULTI mode 950 */ 951 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 952 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 953 sc->adopted_rx_filter_bug = 1; 954 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 955 "working around rx filter bug\n", 956 sc->fw_ver_major, sc->fw_ver_minor, 957 sc->fw_ver_tiny); 958 } 959 960 return status; 961 } 962 963 964 static int 965 mxge_load_firmware(mxge_softc_t *sc, int adopt) 966 { 967 volatile uint32_t *confirm; 968 volatile char *submit; 969 char buf_bytes[72]; 970 uint32_t *buf, size, dma_low, dma_high; 971 int status, i; 972 973 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 974 975 size = sc->sram_size; 976 status = mxge_load_firmware_helper(sc, &size); 977 if (status) { 978 if (!adopt) 979 return status; 980 /* Try to use the currently running firmware, if 981 it is new enough */ 982 status = mxge_adopt_running_firmware(sc); 983 if (status) { 984 device_printf(sc->dev, 985 "failed to adopt running firmware\n"); 986 return status; 987 } 988 device_printf(sc->dev, 989 "Successfully adopted running firmware\n"); 990 if (sc->tx_boundary == 4096) { 991 device_printf(sc->dev, 992 "Using firmware currently running on NIC" 993 ". For optimal\n"); 994 device_printf(sc->dev, 995 "performance consider loading optimized " 996 "firmware\n"); 997 } 998 sc->fw_name = mxge_fw_unaligned; 999 sc->tx_boundary = 2048; 1000 return 0; 1001 } 1002 /* clear confirmation addr */ 1003 confirm = (volatile uint32_t *)sc->cmd; 1004 *confirm = 0; 1005 wmb(); 1006 /* send a reload command to the bootstrap MCP, and wait for the 1007 response in the confirmation address. The firmware should 1008 write a -1 there to indicate it is alive and well 1009 */ 1010 1011 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1012 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1013 1014 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1015 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1016 buf[2] = htobe32(0xffffffff); /* confirm data */ 1017 1018 /* FIX: All newest firmware should un-protect the bottom of 1019 the sram before handoff. However, the very first interfaces 1020 do not. Therefore the handoff copy must skip the first 8 bytes 1021 */ 1022 /* where the code starts*/ 1023 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1024 buf[4] = htobe32(size - 8); /* length of code */ 1025 buf[5] = htobe32(8); /* where to copy to */ 1026 buf[6] = htobe32(0); /* where to jump to */ 1027 1028 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1029 mxge_pio_copy(submit, buf, 64); 1030 wmb(); 1031 DELAY(1000); 1032 wmb(); 1033 i = 0; 1034 while (*confirm != 0xffffffff && i < 20) { 1035 DELAY(1000*10); 1036 i++; 1037 bus_dmamap_sync(sc->cmd_dma.dmat, 1038 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1039 } 1040 if (*confirm != 0xffffffff) { 1041 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1042 confirm, *confirm); 1043 1044 return ENXIO; 1045 } 1046 return 0; 1047 } 1048 1049 static int 1050 mxge_update_mac_address(mxge_softc_t *sc) 1051 { 1052 mxge_cmd_t cmd; 1053 uint8_t *addr = sc->mac_addr; 1054 int status; 1055 1056 1057 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1058 | (addr[2] << 8) | addr[3]); 1059 1060 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1061 1062 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1063 return status; 1064 } 1065 1066 static int 1067 mxge_change_pause(mxge_softc_t *sc, int pause) 1068 { 1069 mxge_cmd_t cmd; 1070 int status; 1071 1072 if (pause) 1073 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1074 &cmd); 1075 else 1076 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1077 &cmd); 1078 1079 if (status) { 1080 device_printf(sc->dev, "Failed to set flow control mode\n"); 1081 return ENXIO; 1082 } 1083 sc->pause = pause; 1084 return 0; 1085 } 1086 1087 static void 1088 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1089 { 1090 mxge_cmd_t cmd; 1091 int status; 1092 1093 if (mxge_always_promisc) 1094 promisc = 1; 1095 1096 if (promisc) 1097 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1098 &cmd); 1099 else 1100 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1101 &cmd); 1102 1103 if (status) { 1104 device_printf(sc->dev, "Failed to set promisc mode\n"); 1105 } 1106 } 1107 1108 static void 1109 mxge_set_multicast_list(mxge_softc_t *sc) 1110 { 1111 mxge_cmd_t cmd; 1112 struct ifmultiaddr *ifma; 1113 struct ifnet *ifp = sc->ifp; 1114 int err; 1115 1116 /* This firmware is known to not support multicast */ 1117 if (!sc->fw_multicast_support) 1118 return; 1119 1120 /* Disable multicast filtering while we play with the lists*/ 1121 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1122 if (err != 0) { 1123 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1124 " error status: %d\n", err); 1125 return; 1126 } 1127 1128 if (sc->adopted_rx_filter_bug) 1129 return; 1130 1131 if (ifp->if_flags & IFF_ALLMULTI) 1132 /* request to disable multicast filtering, so quit here */ 1133 return; 1134 1135 /* Flush all the filters */ 1136 1137 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1138 if (err != 0) { 1139 device_printf(sc->dev, 1140 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1141 ", error status: %d\n", err); 1142 return; 1143 } 1144 1145 /* Walk the multicast list, and add each address */ 1146 1147 if_maddr_rlock(ifp); 1148 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1149 if (ifma->ifma_addr->sa_family != AF_LINK) 1150 continue; 1151 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1152 &cmd.data0, 4); 1153 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1154 &cmd.data1, 2); 1155 cmd.data0 = htonl(cmd.data0); 1156 cmd.data1 = htonl(cmd.data1); 1157 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1158 if (err != 0) { 1159 device_printf(sc->dev, "Failed " 1160 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1161 "%d\t", err); 1162 /* abort, leaving multicast filtering off */ 1163 if_maddr_runlock(ifp); 1164 return; 1165 } 1166 } 1167 if_maddr_runlock(ifp); 1168 /* Enable multicast filtering */ 1169 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1170 if (err != 0) { 1171 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1172 ", error status: %d\n", err); 1173 } 1174 } 1175 1176 static int 1177 mxge_max_mtu(mxge_softc_t *sc) 1178 { 1179 mxge_cmd_t cmd; 1180 int status; 1181 1182 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1183 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1184 1185 /* try to set nbufs to see if it we can 1186 use virtually contiguous jumbos */ 1187 cmd.data0 = 0; 1188 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1189 &cmd); 1190 if (status == 0) 1191 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1192 1193 /* otherwise, we're limited to MJUMPAGESIZE */ 1194 return MJUMPAGESIZE - MXGEFW_PAD; 1195 } 1196 1197 static int 1198 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1199 { 1200 struct mxge_slice_state *ss; 1201 mxge_rx_done_t *rx_done; 1202 volatile uint32_t *irq_claim; 1203 mxge_cmd_t cmd; 1204 int slice, status; 1205 1206 /* try to send a reset command to the card to see if it 1207 is alive */ 1208 memset(&cmd, 0, sizeof (cmd)); 1209 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1210 if (status != 0) { 1211 device_printf(sc->dev, "failed reset\n"); 1212 return ENXIO; 1213 } 1214 1215 mxge_dummy_rdma(sc, 1); 1216 1217 1218 /* set the intrq size */ 1219 cmd.data0 = sc->rx_ring_size; 1220 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1221 1222 /* 1223 * Even though we already know how many slices are supported 1224 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1225 * has magic side effects, and must be called after a reset. 1226 * It must be called prior to calling any RSS related cmds, 1227 * including assigning an interrupt queue for anything but 1228 * slice 0. It must also be called *after* 1229 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1230 * the firmware to compute offsets. 1231 */ 1232 1233 if (sc->num_slices > 1) { 1234 /* ask the maximum number of slices it supports */ 1235 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1236 &cmd); 1237 if (status != 0) { 1238 device_printf(sc->dev, 1239 "failed to get number of slices\n"); 1240 return status; 1241 } 1242 /* 1243 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1244 * to setting up the interrupt queue DMA 1245 */ 1246 cmd.data0 = sc->num_slices; 1247 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1248 #ifdef IFNET_BUF_RING 1249 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1250 #endif 1251 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1252 &cmd); 1253 if (status != 0) { 1254 device_printf(sc->dev, 1255 "failed to set number of slices\n"); 1256 return status; 1257 } 1258 } 1259 1260 1261 if (interrupts_setup) { 1262 /* Now exchange information about interrupts */ 1263 for (slice = 0; slice < sc->num_slices; slice++) { 1264 rx_done = &sc->ss[slice].rx_done; 1265 memset(rx_done->entry, 0, sc->rx_ring_size); 1266 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1267 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1268 cmd.data2 = slice; 1269 status |= mxge_send_cmd(sc, 1270 MXGEFW_CMD_SET_INTRQ_DMA, 1271 &cmd); 1272 } 1273 } 1274 1275 status |= mxge_send_cmd(sc, 1276 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1277 1278 1279 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1280 1281 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1282 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1283 1284 1285 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1286 &cmd); 1287 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1288 if (status != 0) { 1289 device_printf(sc->dev, "failed set interrupt parameters\n"); 1290 return status; 1291 } 1292 1293 1294 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1295 1296 1297 /* run a DMA benchmark */ 1298 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1299 1300 for (slice = 0; slice < sc->num_slices; slice++) { 1301 ss = &sc->ss[slice]; 1302 1303 ss->irq_claim = irq_claim + (2 * slice); 1304 /* reset mcp/driver shared state back to 0 */ 1305 ss->rx_done.idx = 0; 1306 ss->rx_done.cnt = 0; 1307 ss->tx.req = 0; 1308 ss->tx.done = 0; 1309 ss->tx.pkt_done = 0; 1310 ss->tx.queue_active = 0; 1311 ss->tx.activate = 0; 1312 ss->tx.deactivate = 0; 1313 ss->tx.wake = 0; 1314 ss->tx.defrag = 0; 1315 ss->tx.stall = 0; 1316 ss->rx_big.cnt = 0; 1317 ss->rx_small.cnt = 0; 1318 ss->lc.lro_bad_csum = 0; 1319 ss->lc.lro_queued = 0; 1320 ss->lc.lro_flushed = 0; 1321 if (ss->fw_stats != NULL) { 1322 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1323 } 1324 } 1325 sc->rdma_tags_available = 15; 1326 status = mxge_update_mac_address(sc); 1327 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1328 mxge_change_pause(sc, sc->pause); 1329 mxge_set_multicast_list(sc); 1330 if (sc->throttle) { 1331 cmd.data0 = sc->throttle; 1332 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1333 &cmd)) { 1334 device_printf(sc->dev, 1335 "can't enable throttle\n"); 1336 } 1337 } 1338 return status; 1339 } 1340 1341 static int 1342 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1343 { 1344 mxge_cmd_t cmd; 1345 mxge_softc_t *sc; 1346 int err; 1347 unsigned int throttle; 1348 1349 sc = arg1; 1350 throttle = sc->throttle; 1351 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1352 if (err != 0) { 1353 return err; 1354 } 1355 1356 if (throttle == sc->throttle) 1357 return 0; 1358 1359 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1360 return EINVAL; 1361 1362 mtx_lock(&sc->driver_mtx); 1363 cmd.data0 = throttle; 1364 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1365 if (err == 0) 1366 sc->throttle = throttle; 1367 mtx_unlock(&sc->driver_mtx); 1368 return err; 1369 } 1370 1371 static int 1372 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1373 { 1374 mxge_softc_t *sc; 1375 unsigned int intr_coal_delay; 1376 int err; 1377 1378 sc = arg1; 1379 intr_coal_delay = sc->intr_coal_delay; 1380 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1381 if (err != 0) { 1382 return err; 1383 } 1384 if (intr_coal_delay == sc->intr_coal_delay) 1385 return 0; 1386 1387 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1388 return EINVAL; 1389 1390 mtx_lock(&sc->driver_mtx); 1391 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1392 sc->intr_coal_delay = intr_coal_delay; 1393 1394 mtx_unlock(&sc->driver_mtx); 1395 return err; 1396 } 1397 1398 static int 1399 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1400 { 1401 mxge_softc_t *sc; 1402 unsigned int enabled; 1403 int err; 1404 1405 sc = arg1; 1406 enabled = sc->pause; 1407 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1408 if (err != 0) { 1409 return err; 1410 } 1411 if (enabled == sc->pause) 1412 return 0; 1413 1414 mtx_lock(&sc->driver_mtx); 1415 err = mxge_change_pause(sc, enabled); 1416 mtx_unlock(&sc->driver_mtx); 1417 return err; 1418 } 1419 1420 static int 1421 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1422 { 1423 int err; 1424 1425 if (arg1 == NULL) 1426 return EFAULT; 1427 arg2 = be32toh(*(int *)arg1); 1428 arg1 = NULL; 1429 err = sysctl_handle_int(oidp, arg1, arg2, req); 1430 1431 return err; 1432 } 1433 1434 static void 1435 mxge_rem_sysctls(mxge_softc_t *sc) 1436 { 1437 struct mxge_slice_state *ss; 1438 int slice; 1439 1440 if (sc->slice_sysctl_tree == NULL) 1441 return; 1442 1443 for (slice = 0; slice < sc->num_slices; slice++) { 1444 ss = &sc->ss[slice]; 1445 if (ss == NULL || ss->sysctl_tree == NULL) 1446 continue; 1447 sysctl_ctx_free(&ss->sysctl_ctx); 1448 ss->sysctl_tree = NULL; 1449 } 1450 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1451 sc->slice_sysctl_tree = NULL; 1452 } 1453 1454 static void 1455 mxge_add_sysctls(mxge_softc_t *sc) 1456 { 1457 struct sysctl_ctx_list *ctx; 1458 struct sysctl_oid_list *children; 1459 mcp_irq_data_t *fw; 1460 struct mxge_slice_state *ss; 1461 int slice; 1462 char slice_num[8]; 1463 1464 ctx = device_get_sysctl_ctx(sc->dev); 1465 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1466 fw = sc->ss[0].fw_stats; 1467 1468 /* random information */ 1469 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1470 "firmware_version", 1471 CTLFLAG_RD, &sc->fw_version, 1472 0, "firmware version"); 1473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1474 "serial_number", 1475 CTLFLAG_RD, &sc->serial_number_string, 1476 0, "serial number"); 1477 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1478 "product_code", 1479 CTLFLAG_RD, &sc->product_code_string, 1480 0, "product_code"); 1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1482 "pcie_link_width", 1483 CTLFLAG_RD, &sc->link_width, 1484 0, "tx_boundary"); 1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1486 "tx_boundary", 1487 CTLFLAG_RD, &sc->tx_boundary, 1488 0, "tx_boundary"); 1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1490 "write_combine", 1491 CTLFLAG_RD, &sc->wc, 1492 0, "write combining PIO?"); 1493 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1494 "read_dma_MBs", 1495 CTLFLAG_RD, &sc->read_dma, 1496 0, "DMA Read speed in MB/s"); 1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1498 "write_dma_MBs", 1499 CTLFLAG_RD, &sc->write_dma, 1500 0, "DMA Write speed in MB/s"); 1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1502 "read_write_dma_MBs", 1503 CTLFLAG_RD, &sc->read_write_dma, 1504 0, "DMA concurrent Read/Write speed in MB/s"); 1505 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1506 "watchdog_resets", 1507 CTLFLAG_RD, &sc->watchdog_resets, 1508 0, "Number of times NIC was reset"); 1509 1510 1511 /* performance related tunables */ 1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1513 "intr_coal_delay", 1514 CTLTYPE_INT|CTLFLAG_RW, sc, 1515 0, mxge_change_intr_coal, 1516 "I", "interrupt coalescing delay in usecs"); 1517 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "throttle", 1520 CTLTYPE_INT|CTLFLAG_RW, sc, 1521 0, mxge_change_throttle, 1522 "I", "transmit throttling"); 1523 1524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1525 "flow_control_enabled", 1526 CTLTYPE_INT|CTLFLAG_RW, sc, 1527 0, mxge_change_flow_control, 1528 "I", "interrupt coalescing delay in usecs"); 1529 1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1531 "deassert_wait", 1532 CTLFLAG_RW, &mxge_deassert_wait, 1533 0, "Wait for IRQ line to go low in ihandler"); 1534 1535 /* stats block from firmware is in network byte order. 1536 Need to swap it */ 1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1538 "link_up", 1539 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1540 0, mxge_handle_be32, 1541 "I", "link up"); 1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1543 "rdma_tags_available", 1544 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1545 0, mxge_handle_be32, 1546 "I", "rdma_tags_available"); 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "dropped_bad_crc32", 1549 CTLTYPE_INT|CTLFLAG_RD, 1550 &fw->dropped_bad_crc32, 1551 0, mxge_handle_be32, 1552 "I", "dropped_bad_crc32"); 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "dropped_bad_phy", 1555 CTLTYPE_INT|CTLFLAG_RD, 1556 &fw->dropped_bad_phy, 1557 0, mxge_handle_be32, 1558 "I", "dropped_bad_phy"); 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "dropped_link_error_or_filtered", 1561 CTLTYPE_INT|CTLFLAG_RD, 1562 &fw->dropped_link_error_or_filtered, 1563 0, mxge_handle_be32, 1564 "I", "dropped_link_error_or_filtered"); 1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1566 "dropped_link_overflow", 1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1568 0, mxge_handle_be32, 1569 "I", "dropped_link_overflow"); 1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1571 "dropped_multicast_filtered", 1572 CTLTYPE_INT|CTLFLAG_RD, 1573 &fw->dropped_multicast_filtered, 1574 0, mxge_handle_be32, 1575 "I", "dropped_multicast_filtered"); 1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1577 "dropped_no_big_buffer", 1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1579 0, mxge_handle_be32, 1580 "I", "dropped_no_big_buffer"); 1581 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1582 "dropped_no_small_buffer", 1583 CTLTYPE_INT|CTLFLAG_RD, 1584 &fw->dropped_no_small_buffer, 1585 0, mxge_handle_be32, 1586 "I", "dropped_no_small_buffer"); 1587 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1588 "dropped_overrun", 1589 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1590 0, mxge_handle_be32, 1591 "I", "dropped_overrun"); 1592 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1593 "dropped_pause", 1594 CTLTYPE_INT|CTLFLAG_RD, 1595 &fw->dropped_pause, 1596 0, mxge_handle_be32, 1597 "I", "dropped_pause"); 1598 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1599 "dropped_runt", 1600 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1601 0, mxge_handle_be32, 1602 "I", "dropped_runt"); 1603 1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1605 "dropped_unicast_filtered", 1606 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1607 0, mxge_handle_be32, 1608 "I", "dropped_unicast_filtered"); 1609 1610 /* verbose printing? */ 1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1612 "verbose", 1613 CTLFLAG_RW, &mxge_verbose, 1614 0, "verbose printing"); 1615 1616 /* add counters exported for debugging from all slices */ 1617 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1618 sc->slice_sysctl_tree = 1619 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1620 "slice", CTLFLAG_RD, 0, ""); 1621 1622 for (slice = 0; slice < sc->num_slices; slice++) { 1623 ss = &sc->ss[slice]; 1624 sysctl_ctx_init(&ss->sysctl_ctx); 1625 ctx = &ss->sysctl_ctx; 1626 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1627 sprintf(slice_num, "%d", slice); 1628 ss->sysctl_tree = 1629 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1630 CTLFLAG_RD, 0, ""); 1631 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1633 "rx_small_cnt", 1634 CTLFLAG_RD, &ss->rx_small.cnt, 1635 0, "rx_small_cnt"); 1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1637 "rx_big_cnt", 1638 CTLFLAG_RD, &ss->rx_big.cnt, 1639 0, "rx_small_cnt"); 1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1641 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1642 0, "number of lro merge queues flushed"); 1643 1644 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1645 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1646 0, "number of bad csums preventing LRO"); 1647 1648 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1649 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1650 0, "number of frames appended to lro merge" 1651 "queues"); 1652 1653 #ifndef IFNET_BUF_RING 1654 /* only transmit from slice 0 for now */ 1655 if (slice > 0) 1656 continue; 1657 #endif 1658 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1659 "tx_req", 1660 CTLFLAG_RD, &ss->tx.req, 1661 0, "tx_req"); 1662 1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1664 "tx_done", 1665 CTLFLAG_RD, &ss->tx.done, 1666 0, "tx_done"); 1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1668 "tx_pkt_done", 1669 CTLFLAG_RD, &ss->tx.pkt_done, 1670 0, "tx_done"); 1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1672 "tx_stall", 1673 CTLFLAG_RD, &ss->tx.stall, 1674 0, "tx_stall"); 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "tx_wake", 1677 CTLFLAG_RD, &ss->tx.wake, 1678 0, "tx_wake"); 1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1680 "tx_defrag", 1681 CTLFLAG_RD, &ss->tx.defrag, 1682 0, "tx_defrag"); 1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1684 "tx_queue_active", 1685 CTLFLAG_RD, &ss->tx.queue_active, 1686 0, "tx_queue_active"); 1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1688 "tx_activate", 1689 CTLFLAG_RD, &ss->tx.activate, 1690 0, "tx_activate"); 1691 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1692 "tx_deactivate", 1693 CTLFLAG_RD, &ss->tx.deactivate, 1694 0, "tx_deactivate"); 1695 } 1696 } 1697 1698 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1699 backwards one at a time and handle ring wraps */ 1700 1701 static inline void 1702 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1703 mcp_kreq_ether_send_t *src, int cnt) 1704 { 1705 int idx, starting_slot; 1706 starting_slot = tx->req; 1707 while (cnt > 1) { 1708 cnt--; 1709 idx = (starting_slot + cnt) & tx->mask; 1710 mxge_pio_copy(&tx->lanai[idx], 1711 &src[cnt], sizeof(*src)); 1712 wmb(); 1713 } 1714 } 1715 1716 /* 1717 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1718 * at most 32 bytes at a time, so as to avoid involving the software 1719 * pio handler in the nic. We re-write the first segment's flags 1720 * to mark them valid only after writing the entire chain 1721 */ 1722 1723 static inline void 1724 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1725 int cnt) 1726 { 1727 int idx, i; 1728 uint32_t *src_ints; 1729 volatile uint32_t *dst_ints; 1730 mcp_kreq_ether_send_t *srcp; 1731 volatile mcp_kreq_ether_send_t *dstp, *dst; 1732 uint8_t last_flags; 1733 1734 idx = tx->req & tx->mask; 1735 1736 last_flags = src->flags; 1737 src->flags = 0; 1738 wmb(); 1739 dst = dstp = &tx->lanai[idx]; 1740 srcp = src; 1741 1742 if ((idx + cnt) < tx->mask) { 1743 for (i = 0; i < (cnt - 1); i += 2) { 1744 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1745 wmb(); /* force write every 32 bytes */ 1746 srcp += 2; 1747 dstp += 2; 1748 } 1749 } else { 1750 /* submit all but the first request, and ensure 1751 that it is submitted below */ 1752 mxge_submit_req_backwards(tx, src, cnt); 1753 i = 0; 1754 } 1755 if (i < cnt) { 1756 /* submit the first request */ 1757 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1758 wmb(); /* barrier before setting valid flag */ 1759 } 1760 1761 /* re-write the last 32-bits with the valid flags */ 1762 src->flags = last_flags; 1763 src_ints = (uint32_t *)src; 1764 src_ints+=3; 1765 dst_ints = (volatile uint32_t *)dst; 1766 dst_ints+=3; 1767 *dst_ints = *src_ints; 1768 tx->req += cnt; 1769 wmb(); 1770 } 1771 1772 static int 1773 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1774 struct mxge_pkt_info *pi) 1775 { 1776 struct ether_vlan_header *eh; 1777 uint16_t etype; 1778 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1779 #if IFCAP_TSO6 && defined(INET6) 1780 int nxt; 1781 #endif 1782 1783 eh = mtod(m, struct ether_vlan_header *); 1784 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1785 etype = ntohs(eh->evl_proto); 1786 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1787 } else { 1788 etype = ntohs(eh->evl_encap_proto); 1789 pi->ip_off = ETHER_HDR_LEN; 1790 } 1791 1792 switch (etype) { 1793 case ETHERTYPE_IP: 1794 /* 1795 * ensure ip header is in first mbuf, copy it to a 1796 * scratch buffer if not 1797 */ 1798 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1799 pi->ip6 = NULL; 1800 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1801 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1802 ss->scratch); 1803 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1804 } 1805 pi->ip_hlen = pi->ip->ip_hl << 2; 1806 if (!tso) 1807 return 0; 1808 1809 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1810 sizeof(struct tcphdr))) { 1811 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1812 sizeof(struct tcphdr), ss->scratch); 1813 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1814 } 1815 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1816 break; 1817 #if IFCAP_TSO6 && defined(INET6) 1818 case ETHERTYPE_IPV6: 1819 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1820 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1821 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1822 ss->scratch); 1823 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1824 } 1825 nxt = 0; 1826 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1827 pi->ip_hlen -= pi->ip_off; 1828 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1829 return EINVAL; 1830 1831 if (!tso) 1832 return 0; 1833 1834 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1835 return EINVAL; 1836 1837 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1838 sizeof(struct tcphdr))) { 1839 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1840 sizeof(struct tcphdr), ss->scratch); 1841 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1842 } 1843 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1844 break; 1845 #endif 1846 default: 1847 return EINVAL; 1848 } 1849 return 0; 1850 } 1851 1852 #if IFCAP_TSO4 1853 1854 static void 1855 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1856 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1857 { 1858 mxge_tx_ring_t *tx; 1859 mcp_kreq_ether_send_t *req; 1860 bus_dma_segment_t *seg; 1861 uint32_t low, high_swapped; 1862 int len, seglen, cum_len, cum_len_next; 1863 int next_is_first, chop, cnt, rdma_count, small; 1864 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1865 uint8_t flags, flags_next; 1866 static int once; 1867 1868 mss = m->m_pkthdr.tso_segsz; 1869 1870 /* negative cum_len signifies to the 1871 * send loop that we are still in the 1872 * header portion of the TSO packet. 1873 */ 1874 1875 cksum_offset = pi->ip_off + pi->ip_hlen; 1876 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1877 1878 /* TSO implies checksum offload on this hardware */ 1879 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1880 /* 1881 * If packet has full TCP csum, replace it with pseudo hdr 1882 * sum that the NIC expects, otherwise the NIC will emit 1883 * packets with bad TCP checksums. 1884 */ 1885 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1886 if (pi->ip6) { 1887 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1888 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1889 sum = in6_cksum_pseudo(pi->ip6, 1890 m->m_pkthdr.len - cksum_offset, 1891 IPPROTO_TCP, 0); 1892 #endif 1893 } else { 1894 #ifdef INET 1895 m->m_pkthdr.csum_flags |= CSUM_TCP; 1896 sum = in_pseudo(pi->ip->ip_src.s_addr, 1897 pi->ip->ip_dst.s_addr, 1898 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1899 cksum_offset))); 1900 #endif 1901 } 1902 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1903 cksum_offset, sizeof(sum), (caddr_t)&sum); 1904 } 1905 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1906 1907 1908 /* for TSO, pseudo_hdr_offset holds mss. 1909 * The firmware figures out where to put 1910 * the checksum by parsing the header. */ 1911 pseudo_hdr_offset = htobe16(mss); 1912 1913 if (pi->ip6) { 1914 /* 1915 * for IPv6 TSO, the "checksum offset" is re-purposed 1916 * to store the TCP header len 1917 */ 1918 cksum_offset = (pi->tcp->th_off << 2); 1919 } 1920 1921 tx = &ss->tx; 1922 req = tx->req_list; 1923 seg = tx->seg_list; 1924 cnt = 0; 1925 rdma_count = 0; 1926 /* "rdma_count" is the number of RDMAs belonging to the 1927 * current packet BEFORE the current send request. For 1928 * non-TSO packets, this is equal to "count". 1929 * For TSO packets, rdma_count needs to be reset 1930 * to 0 after a segment cut. 1931 * 1932 * The rdma_count field of the send request is 1933 * the number of RDMAs of the packet starting at 1934 * that request. For TSO send requests with one ore more cuts 1935 * in the middle, this is the number of RDMAs starting 1936 * after the last cut in the request. All previous 1937 * segments before the last cut implicitly have 1 RDMA. 1938 * 1939 * Since the number of RDMAs is not known beforehand, 1940 * it must be filled-in retroactively - after each 1941 * segmentation cut or at the end of the entire packet. 1942 */ 1943 1944 while (busdma_seg_cnt) { 1945 /* Break the busdma segment up into pieces*/ 1946 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1947 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1948 len = seg->ds_len; 1949 1950 while (len) { 1951 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1952 seglen = len; 1953 cum_len_next = cum_len + seglen; 1954 (req-rdma_count)->rdma_count = rdma_count + 1; 1955 if (__predict_true(cum_len >= 0)) { 1956 /* payload */ 1957 chop = (cum_len_next > mss); 1958 cum_len_next = cum_len_next % mss; 1959 next_is_first = (cum_len_next == 0); 1960 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1961 flags_next |= next_is_first * 1962 MXGEFW_FLAGS_FIRST; 1963 rdma_count |= -(chop | next_is_first); 1964 rdma_count += chop & !next_is_first; 1965 } else if (cum_len_next >= 0) { 1966 /* header ends */ 1967 rdma_count = -1; 1968 cum_len_next = 0; 1969 seglen = -cum_len; 1970 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1971 flags_next = MXGEFW_FLAGS_TSO_PLD | 1972 MXGEFW_FLAGS_FIRST | 1973 (small * MXGEFW_FLAGS_SMALL); 1974 } 1975 1976 req->addr_high = high_swapped; 1977 req->addr_low = htobe32(low); 1978 req->pseudo_hdr_offset = pseudo_hdr_offset; 1979 req->pad = 0; 1980 req->rdma_count = 1; 1981 req->length = htobe16(seglen); 1982 req->cksum_offset = cksum_offset; 1983 req->flags = flags | ((cum_len & 1) * 1984 MXGEFW_FLAGS_ALIGN_ODD); 1985 low += seglen; 1986 len -= seglen; 1987 cum_len = cum_len_next; 1988 flags = flags_next; 1989 req++; 1990 cnt++; 1991 rdma_count++; 1992 if (cksum_offset != 0 && !pi->ip6) { 1993 if (__predict_false(cksum_offset > seglen)) 1994 cksum_offset -= seglen; 1995 else 1996 cksum_offset = 0; 1997 } 1998 if (__predict_false(cnt > tx->max_desc)) 1999 goto drop; 2000 } 2001 busdma_seg_cnt--; 2002 seg++; 2003 } 2004 (req-rdma_count)->rdma_count = rdma_count; 2005 2006 do { 2007 req--; 2008 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2009 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 2010 2011 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2012 mxge_submit_req(tx, tx->req_list, cnt); 2013 #ifdef IFNET_BUF_RING 2014 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2015 /* tell the NIC to start polling this slice */ 2016 *tx->send_go = 1; 2017 tx->queue_active = 1; 2018 tx->activate++; 2019 wmb(); 2020 } 2021 #endif 2022 return; 2023 2024 drop: 2025 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2026 m_freem(m); 2027 ss->oerrors++; 2028 if (!once) { 2029 printf("tx->max_desc exceeded via TSO!\n"); 2030 printf("mss = %d, %ld, %d!\n", mss, 2031 (long)seg - (long)tx->seg_list, tx->max_desc); 2032 once = 1; 2033 } 2034 return; 2035 2036 } 2037 2038 #endif /* IFCAP_TSO4 */ 2039 2040 #ifdef MXGE_NEW_VLAN_API 2041 /* 2042 * We reproduce the software vlan tag insertion from 2043 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2044 * vlan tag insertion. We need to advertise this in order to have the 2045 * vlan interface respect our csum offload flags. 2046 */ 2047 static struct mbuf * 2048 mxge_vlan_tag_insert(struct mbuf *m) 2049 { 2050 struct ether_vlan_header *evl; 2051 2052 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2053 if (__predict_false(m == NULL)) 2054 return NULL; 2055 if (m->m_len < sizeof(*evl)) { 2056 m = m_pullup(m, sizeof(*evl)); 2057 if (__predict_false(m == NULL)) 2058 return NULL; 2059 } 2060 /* 2061 * Transform the Ethernet header into an Ethernet header 2062 * with 802.1Q encapsulation. 2063 */ 2064 evl = mtod(m, struct ether_vlan_header *); 2065 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2066 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2067 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2068 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2069 m->m_flags &= ~M_VLANTAG; 2070 return m; 2071 } 2072 #endif /* MXGE_NEW_VLAN_API */ 2073 2074 static void 2075 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2076 { 2077 struct mxge_pkt_info pi = {0,0,0,0}; 2078 mxge_softc_t *sc; 2079 mcp_kreq_ether_send_t *req; 2080 bus_dma_segment_t *seg; 2081 struct mbuf *m_tmp; 2082 struct ifnet *ifp; 2083 mxge_tx_ring_t *tx; 2084 int cnt, cum_len, err, i, idx, odd_flag; 2085 uint16_t pseudo_hdr_offset; 2086 uint8_t flags, cksum_offset; 2087 2088 2089 sc = ss->sc; 2090 ifp = sc->ifp; 2091 tx = &ss->tx; 2092 2093 #ifdef MXGE_NEW_VLAN_API 2094 if (m->m_flags & M_VLANTAG) { 2095 m = mxge_vlan_tag_insert(m); 2096 if (__predict_false(m == NULL)) 2097 goto drop_without_m; 2098 } 2099 #endif 2100 if (m->m_pkthdr.csum_flags & 2101 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2102 if (mxge_parse_tx(ss, m, &pi)) 2103 goto drop; 2104 } 2105 2106 /* (try to) map the frame for DMA */ 2107 idx = tx->req & tx->mask; 2108 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2109 m, tx->seg_list, &cnt, 2110 BUS_DMA_NOWAIT); 2111 if (__predict_false(err == EFBIG)) { 2112 /* Too many segments in the chain. Try 2113 to defrag */ 2114 m_tmp = m_defrag(m, M_NOWAIT); 2115 if (m_tmp == NULL) { 2116 goto drop; 2117 } 2118 ss->tx.defrag++; 2119 m = m_tmp; 2120 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2121 tx->info[idx].map, 2122 m, tx->seg_list, &cnt, 2123 BUS_DMA_NOWAIT); 2124 } 2125 if (__predict_false(err != 0)) { 2126 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2127 " packet len = %d\n", err, m->m_pkthdr.len); 2128 goto drop; 2129 } 2130 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2131 BUS_DMASYNC_PREWRITE); 2132 tx->info[idx].m = m; 2133 2134 #if IFCAP_TSO4 2135 /* TSO is different enough, we handle it in another routine */ 2136 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2137 mxge_encap_tso(ss, m, cnt, &pi); 2138 return; 2139 } 2140 #endif 2141 2142 req = tx->req_list; 2143 cksum_offset = 0; 2144 pseudo_hdr_offset = 0; 2145 flags = MXGEFW_FLAGS_NO_TSO; 2146 2147 /* checksum offloading? */ 2148 if (m->m_pkthdr.csum_flags & 2149 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2150 /* ensure ip header is in first mbuf, copy 2151 it to a scratch buffer if not */ 2152 cksum_offset = pi.ip_off + pi.ip_hlen; 2153 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2154 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2155 req->cksum_offset = cksum_offset; 2156 flags |= MXGEFW_FLAGS_CKSUM; 2157 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2158 } else { 2159 odd_flag = 0; 2160 } 2161 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2162 flags |= MXGEFW_FLAGS_SMALL; 2163 2164 /* convert segments into a request list */ 2165 cum_len = 0; 2166 seg = tx->seg_list; 2167 req->flags = MXGEFW_FLAGS_FIRST; 2168 for (i = 0; i < cnt; i++) { 2169 req->addr_low = 2170 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2171 req->addr_high = 2172 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2173 req->length = htobe16(seg->ds_len); 2174 req->cksum_offset = cksum_offset; 2175 if (cksum_offset > seg->ds_len) 2176 cksum_offset -= seg->ds_len; 2177 else 2178 cksum_offset = 0; 2179 req->pseudo_hdr_offset = pseudo_hdr_offset; 2180 req->pad = 0; /* complete solid 16-byte block */ 2181 req->rdma_count = 1; 2182 req->flags |= flags | ((cum_len & 1) * odd_flag); 2183 cum_len += seg->ds_len; 2184 seg++; 2185 req++; 2186 req->flags = 0; 2187 } 2188 req--; 2189 /* pad runts to 60 bytes */ 2190 if (cum_len < 60) { 2191 req++; 2192 req->addr_low = 2193 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2194 req->addr_high = 2195 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2196 req->length = htobe16(60 - cum_len); 2197 req->cksum_offset = 0; 2198 req->pseudo_hdr_offset = pseudo_hdr_offset; 2199 req->pad = 0; /* complete solid 16-byte block */ 2200 req->rdma_count = 1; 2201 req->flags |= flags | ((cum_len & 1) * odd_flag); 2202 cnt++; 2203 } 2204 2205 tx->req_list[0].rdma_count = cnt; 2206 #if 0 2207 /* print what the firmware will see */ 2208 for (i = 0; i < cnt; i++) { 2209 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2210 "cso:%d, flags:0x%x, rdma:%d\n", 2211 i, (int)ntohl(tx->req_list[i].addr_high), 2212 (int)ntohl(tx->req_list[i].addr_low), 2213 (int)ntohs(tx->req_list[i].length), 2214 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2215 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2216 tx->req_list[i].rdma_count); 2217 } 2218 printf("--------------\n"); 2219 #endif 2220 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2221 mxge_submit_req(tx, tx->req_list, cnt); 2222 #ifdef IFNET_BUF_RING 2223 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2224 /* tell the NIC to start polling this slice */ 2225 *tx->send_go = 1; 2226 tx->queue_active = 1; 2227 tx->activate++; 2228 wmb(); 2229 } 2230 #endif 2231 return; 2232 2233 drop: 2234 m_freem(m); 2235 drop_without_m: 2236 ss->oerrors++; 2237 return; 2238 } 2239 2240 #ifdef IFNET_BUF_RING 2241 static void 2242 mxge_qflush(struct ifnet *ifp) 2243 { 2244 mxge_softc_t *sc = ifp->if_softc; 2245 mxge_tx_ring_t *tx; 2246 struct mbuf *m; 2247 int slice; 2248 2249 for (slice = 0; slice < sc->num_slices; slice++) { 2250 tx = &sc->ss[slice].tx; 2251 mtx_lock(&tx->mtx); 2252 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2253 m_freem(m); 2254 mtx_unlock(&tx->mtx); 2255 } 2256 if_qflush(ifp); 2257 } 2258 2259 static inline void 2260 mxge_start_locked(struct mxge_slice_state *ss) 2261 { 2262 mxge_softc_t *sc; 2263 struct mbuf *m; 2264 struct ifnet *ifp; 2265 mxge_tx_ring_t *tx; 2266 2267 sc = ss->sc; 2268 ifp = sc->ifp; 2269 tx = &ss->tx; 2270 2271 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2272 m = drbr_dequeue(ifp, tx->br); 2273 if (m == NULL) { 2274 return; 2275 } 2276 /* let BPF see it */ 2277 BPF_MTAP(ifp, m); 2278 2279 /* give it to the nic */ 2280 mxge_encap(ss, m); 2281 } 2282 /* ran out of transmit slots */ 2283 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2284 && (!drbr_empty(ifp, tx->br))) { 2285 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2286 tx->stall++; 2287 } 2288 } 2289 2290 static int 2291 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2292 { 2293 mxge_softc_t *sc; 2294 struct ifnet *ifp; 2295 mxge_tx_ring_t *tx; 2296 int err; 2297 2298 sc = ss->sc; 2299 ifp = sc->ifp; 2300 tx = &ss->tx; 2301 2302 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2303 IFF_DRV_RUNNING) { 2304 err = drbr_enqueue(ifp, tx->br, m); 2305 return (err); 2306 } 2307 2308 if (!drbr_needs_enqueue(ifp, tx->br) && 2309 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2310 /* let BPF see it */ 2311 BPF_MTAP(ifp, m); 2312 /* give it to the nic */ 2313 mxge_encap(ss, m); 2314 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2315 return (err); 2316 } 2317 if (!drbr_empty(ifp, tx->br)) 2318 mxge_start_locked(ss); 2319 return (0); 2320 } 2321 2322 static int 2323 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2324 { 2325 mxge_softc_t *sc = ifp->if_softc; 2326 struct mxge_slice_state *ss; 2327 mxge_tx_ring_t *tx; 2328 int err = 0; 2329 int slice; 2330 2331 slice = m->m_pkthdr.flowid; 2332 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2333 2334 ss = &sc->ss[slice]; 2335 tx = &ss->tx; 2336 2337 if (mtx_trylock(&tx->mtx)) { 2338 err = mxge_transmit_locked(ss, m); 2339 mtx_unlock(&tx->mtx); 2340 } else { 2341 err = drbr_enqueue(ifp, tx->br, m); 2342 } 2343 2344 return (err); 2345 } 2346 2347 #else 2348 2349 static inline void 2350 mxge_start_locked(struct mxge_slice_state *ss) 2351 { 2352 mxge_softc_t *sc; 2353 struct mbuf *m; 2354 struct ifnet *ifp; 2355 mxge_tx_ring_t *tx; 2356 2357 sc = ss->sc; 2358 ifp = sc->ifp; 2359 tx = &ss->tx; 2360 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2361 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2362 if (m == NULL) { 2363 return; 2364 } 2365 /* let BPF see it */ 2366 BPF_MTAP(ifp, m); 2367 2368 /* give it to the nic */ 2369 mxge_encap(ss, m); 2370 } 2371 /* ran out of transmit slots */ 2372 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2373 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2374 tx->stall++; 2375 } 2376 } 2377 #endif 2378 static void 2379 mxge_start(struct ifnet *ifp) 2380 { 2381 mxge_softc_t *sc = ifp->if_softc; 2382 struct mxge_slice_state *ss; 2383 2384 /* only use the first slice for now */ 2385 ss = &sc->ss[0]; 2386 mtx_lock(&ss->tx.mtx); 2387 mxge_start_locked(ss); 2388 mtx_unlock(&ss->tx.mtx); 2389 } 2390 2391 /* 2392 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2393 * at most 32 bytes at a time, so as to avoid involving the software 2394 * pio handler in the nic. We re-write the first segment's low 2395 * DMA address to mark it valid only after we write the entire chunk 2396 * in a burst 2397 */ 2398 static inline void 2399 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2400 mcp_kreq_ether_recv_t *src) 2401 { 2402 uint32_t low; 2403 2404 low = src->addr_low; 2405 src->addr_low = 0xffffffff; 2406 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2407 wmb(); 2408 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2409 wmb(); 2410 src->addr_low = low; 2411 dst->addr_low = low; 2412 wmb(); 2413 } 2414 2415 static int 2416 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2417 { 2418 bus_dma_segment_t seg; 2419 struct mbuf *m; 2420 mxge_rx_ring_t *rx = &ss->rx_small; 2421 int cnt, err; 2422 2423 m = m_gethdr(M_NOWAIT, MT_DATA); 2424 if (m == NULL) { 2425 rx->alloc_fail++; 2426 err = ENOBUFS; 2427 goto done; 2428 } 2429 m->m_len = MHLEN; 2430 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2431 &seg, &cnt, BUS_DMA_NOWAIT); 2432 if (err != 0) { 2433 m_free(m); 2434 goto done; 2435 } 2436 rx->info[idx].m = m; 2437 rx->shadow[idx].addr_low = 2438 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2439 rx->shadow[idx].addr_high = 2440 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2441 2442 done: 2443 if ((idx & 7) == 7) 2444 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2445 return err; 2446 } 2447 2448 static int 2449 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2450 { 2451 bus_dma_segment_t seg[3]; 2452 struct mbuf *m; 2453 mxge_rx_ring_t *rx = &ss->rx_big; 2454 int cnt, err, i; 2455 2456 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2457 if (m == NULL) { 2458 rx->alloc_fail++; 2459 err = ENOBUFS; 2460 goto done; 2461 } 2462 m->m_len = rx->mlen; 2463 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2464 seg, &cnt, BUS_DMA_NOWAIT); 2465 if (err != 0) { 2466 m_free(m); 2467 goto done; 2468 } 2469 rx->info[idx].m = m; 2470 rx->shadow[idx].addr_low = 2471 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2472 rx->shadow[idx].addr_high = 2473 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2474 2475 #if MXGE_VIRT_JUMBOS 2476 for (i = 1; i < cnt; i++) { 2477 rx->shadow[idx + i].addr_low = 2478 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2479 rx->shadow[idx + i].addr_high = 2480 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2481 } 2482 #endif 2483 2484 done: 2485 for (i = 0; i < rx->nbufs; i++) { 2486 if ((idx & 7) == 7) { 2487 mxge_submit_8rx(&rx->lanai[idx - 7], 2488 &rx->shadow[idx - 7]); 2489 } 2490 idx++; 2491 } 2492 return err; 2493 } 2494 2495 #ifdef INET6 2496 2497 static uint16_t 2498 mxge_csum_generic(uint16_t *raw, int len) 2499 { 2500 uint32_t csum; 2501 2502 2503 csum = 0; 2504 while (len > 0) { 2505 csum += *raw; 2506 raw++; 2507 len -= 2; 2508 } 2509 csum = (csum >> 16) + (csum & 0xffff); 2510 csum = (csum >> 16) + (csum & 0xffff); 2511 return (uint16_t)csum; 2512 } 2513 2514 static inline uint16_t 2515 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2516 { 2517 uint32_t partial; 2518 int nxt, cksum_offset; 2519 struct ip6_hdr *ip6 = p; 2520 uint16_t c; 2521 2522 nxt = ip6->ip6_nxt; 2523 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2524 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2525 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2526 IPPROTO_IPV6, &nxt); 2527 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2528 return (1); 2529 } 2530 2531 /* 2532 * IPv6 headers do not contain a checksum, and hence 2533 * do not checksum to zero, so they don't "fall out" 2534 * of the partial checksum calculation like IPv4 2535 * headers do. We need to fix the partial checksum by 2536 * subtracting the checksum of the IPv6 header. 2537 */ 2538 2539 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2540 ETHER_HDR_LEN); 2541 csum += ~partial; 2542 csum += (csum < ~partial); 2543 csum = (csum >> 16) + (csum & 0xFFFF); 2544 csum = (csum >> 16) + (csum & 0xFFFF); 2545 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2546 csum); 2547 c ^= 0xffff; 2548 return (c); 2549 } 2550 #endif /* INET6 */ 2551 /* 2552 * Myri10GE hardware checksums are not valid if the sender 2553 * padded the frame with non-zero padding. This is because 2554 * the firmware just does a simple 16-bit 1s complement 2555 * checksum across the entire frame, excluding the first 14 2556 * bytes. It is best to simply to check the checksum and 2557 * tell the stack about it only if the checksum is good 2558 */ 2559 2560 static inline uint16_t 2561 mxge_rx_csum(struct mbuf *m, int csum) 2562 { 2563 struct ether_header *eh; 2564 #ifdef INET 2565 struct ip *ip; 2566 #endif 2567 #if defined(INET) || defined(INET6) 2568 int cap = m->m_pkthdr.rcvif->if_capenable; 2569 #endif 2570 uint16_t c, etype; 2571 2572 2573 eh = mtod(m, struct ether_header *); 2574 etype = ntohs(eh->ether_type); 2575 switch (etype) { 2576 #ifdef INET 2577 case ETHERTYPE_IP: 2578 if ((cap & IFCAP_RXCSUM) == 0) 2579 return (1); 2580 ip = (struct ip *)(eh + 1); 2581 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2582 return (1); 2583 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2584 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2585 (ip->ip_hl << 2) + ip->ip_p)); 2586 c ^= 0xffff; 2587 break; 2588 #endif 2589 #ifdef INET6 2590 case ETHERTYPE_IPV6: 2591 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2592 return (1); 2593 c = mxge_rx_csum6((eh + 1), m, csum); 2594 break; 2595 #endif 2596 default: 2597 c = 1; 2598 } 2599 return (c); 2600 } 2601 2602 static void 2603 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2604 { 2605 struct ether_vlan_header *evl; 2606 struct ether_header *eh; 2607 uint32_t partial; 2608 2609 evl = mtod(m, struct ether_vlan_header *); 2610 eh = mtod(m, struct ether_header *); 2611 2612 /* 2613 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2614 * after what the firmware thought was the end of the ethernet 2615 * header. 2616 */ 2617 2618 /* put checksum into host byte order */ 2619 *csum = ntohs(*csum); 2620 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2621 (*csum) += ~partial; 2622 (*csum) += ((*csum) < ~partial); 2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2624 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2625 2626 /* restore checksum to network byte order; 2627 later consumers expect this */ 2628 *csum = htons(*csum); 2629 2630 /* save the tag */ 2631 #ifdef MXGE_NEW_VLAN_API 2632 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2633 #else 2634 { 2635 struct m_tag *mtag; 2636 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2637 M_NOWAIT); 2638 if (mtag == NULL) 2639 return; 2640 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2641 m_tag_prepend(m, mtag); 2642 } 2643 2644 #endif 2645 m->m_flags |= M_VLANTAG; 2646 2647 /* 2648 * Remove the 802.1q header by copying the Ethernet 2649 * addresses over it and adjusting the beginning of 2650 * the data in the mbuf. The encapsulated Ethernet 2651 * type field is already in place. 2652 */ 2653 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2654 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2655 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2656 } 2657 2658 2659 static inline void 2660 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2661 uint32_t csum, int lro) 2662 { 2663 mxge_softc_t *sc; 2664 struct ifnet *ifp; 2665 struct mbuf *m; 2666 struct ether_header *eh; 2667 mxge_rx_ring_t *rx; 2668 bus_dmamap_t old_map; 2669 int idx; 2670 2671 sc = ss->sc; 2672 ifp = sc->ifp; 2673 rx = &ss->rx_big; 2674 idx = rx->cnt & rx->mask; 2675 rx->cnt += rx->nbufs; 2676 /* save a pointer to the received mbuf */ 2677 m = rx->info[idx].m; 2678 /* try to replace the received mbuf */ 2679 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2680 /* drop the frame -- the old mbuf is re-cycled */ 2681 ifp->if_ierrors++; 2682 return; 2683 } 2684 2685 /* unmap the received buffer */ 2686 old_map = rx->info[idx].map; 2687 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2688 bus_dmamap_unload(rx->dmat, old_map); 2689 2690 /* swap the bus_dmamap_t's */ 2691 rx->info[idx].map = rx->extra_map; 2692 rx->extra_map = old_map; 2693 2694 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2695 * aligned */ 2696 m->m_data += MXGEFW_PAD; 2697 2698 m->m_pkthdr.rcvif = ifp; 2699 m->m_len = m->m_pkthdr.len = len; 2700 ss->ipackets++; 2701 eh = mtod(m, struct ether_header *); 2702 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2703 mxge_vlan_tag_remove(m, &csum); 2704 } 2705 /* if the checksum is valid, mark it in the mbuf header */ 2706 2707 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2708 (0 == mxge_rx_csum(m, csum))) { 2709 /* Tell the stack that the checksum is good */ 2710 m->m_pkthdr.csum_data = 0xffff; 2711 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2712 CSUM_DATA_VALID; 2713 2714 #if defined(INET) || defined (INET6) 2715 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2716 return; 2717 #endif 2718 } 2719 /* flowid only valid if RSS hashing is enabled */ 2720 if (sc->num_slices > 1) { 2721 m->m_pkthdr.flowid = (ss - sc->ss); 2722 m->m_flags |= M_FLOWID; 2723 } 2724 /* pass the frame up the stack */ 2725 (*ifp->if_input)(ifp, m); 2726 } 2727 2728 static inline void 2729 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2730 uint32_t csum, int lro) 2731 { 2732 mxge_softc_t *sc; 2733 struct ifnet *ifp; 2734 struct ether_header *eh; 2735 struct mbuf *m; 2736 mxge_rx_ring_t *rx; 2737 bus_dmamap_t old_map; 2738 int idx; 2739 2740 sc = ss->sc; 2741 ifp = sc->ifp; 2742 rx = &ss->rx_small; 2743 idx = rx->cnt & rx->mask; 2744 rx->cnt++; 2745 /* save a pointer to the received mbuf */ 2746 m = rx->info[idx].m; 2747 /* try to replace the received mbuf */ 2748 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2749 /* drop the frame -- the old mbuf is re-cycled */ 2750 ifp->if_ierrors++; 2751 return; 2752 } 2753 2754 /* unmap the received buffer */ 2755 old_map = rx->info[idx].map; 2756 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2757 bus_dmamap_unload(rx->dmat, old_map); 2758 2759 /* swap the bus_dmamap_t's */ 2760 rx->info[idx].map = rx->extra_map; 2761 rx->extra_map = old_map; 2762 2763 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2764 * aligned */ 2765 m->m_data += MXGEFW_PAD; 2766 2767 m->m_pkthdr.rcvif = ifp; 2768 m->m_len = m->m_pkthdr.len = len; 2769 ss->ipackets++; 2770 eh = mtod(m, struct ether_header *); 2771 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2772 mxge_vlan_tag_remove(m, &csum); 2773 } 2774 /* if the checksum is valid, mark it in the mbuf header */ 2775 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2776 (0 == mxge_rx_csum(m, csum))) { 2777 /* Tell the stack that the checksum is good */ 2778 m->m_pkthdr.csum_data = 0xffff; 2779 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2780 CSUM_DATA_VALID; 2781 2782 #if defined(INET) || defined (INET6) 2783 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2784 return; 2785 #endif 2786 } 2787 /* flowid only valid if RSS hashing is enabled */ 2788 if (sc->num_slices > 1) { 2789 m->m_pkthdr.flowid = (ss - sc->ss); 2790 m->m_flags |= M_FLOWID; 2791 } 2792 /* pass the frame up the stack */ 2793 (*ifp->if_input)(ifp, m); 2794 } 2795 2796 static inline void 2797 mxge_clean_rx_done(struct mxge_slice_state *ss) 2798 { 2799 mxge_rx_done_t *rx_done = &ss->rx_done; 2800 int limit = 0; 2801 uint16_t length; 2802 uint16_t checksum; 2803 int lro; 2804 2805 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2806 while (rx_done->entry[rx_done->idx].length != 0) { 2807 length = ntohs(rx_done->entry[rx_done->idx].length); 2808 rx_done->entry[rx_done->idx].length = 0; 2809 checksum = rx_done->entry[rx_done->idx].checksum; 2810 if (length <= (MHLEN - MXGEFW_PAD)) 2811 mxge_rx_done_small(ss, length, checksum, lro); 2812 else 2813 mxge_rx_done_big(ss, length, checksum, lro); 2814 rx_done->cnt++; 2815 rx_done->idx = rx_done->cnt & rx_done->mask; 2816 2817 /* limit potential for livelock */ 2818 if (__predict_false(++limit > rx_done->mask / 2)) 2819 break; 2820 } 2821 #if defined(INET) || defined (INET6) 2822 while (!SLIST_EMPTY(&ss->lc.lro_active)) { 2823 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active); 2824 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next); 2825 tcp_lro_flush(&ss->lc, lro); 2826 } 2827 #endif 2828 } 2829 2830 2831 static inline void 2832 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2833 { 2834 struct ifnet *ifp; 2835 mxge_tx_ring_t *tx; 2836 struct mbuf *m; 2837 bus_dmamap_t map; 2838 int idx; 2839 int *flags; 2840 2841 tx = &ss->tx; 2842 ifp = ss->sc->ifp; 2843 while (tx->pkt_done != mcp_idx) { 2844 idx = tx->done & tx->mask; 2845 tx->done++; 2846 m = tx->info[idx].m; 2847 /* mbuf and DMA map only attached to the first 2848 segment per-mbuf */ 2849 if (m != NULL) { 2850 ss->obytes += m->m_pkthdr.len; 2851 if (m->m_flags & M_MCAST) 2852 ss->omcasts++; 2853 ss->opackets++; 2854 tx->info[idx].m = NULL; 2855 map = tx->info[idx].map; 2856 bus_dmamap_unload(tx->dmat, map); 2857 m_freem(m); 2858 } 2859 if (tx->info[idx].flag) { 2860 tx->info[idx].flag = 0; 2861 tx->pkt_done++; 2862 } 2863 } 2864 2865 /* If we have space, clear IFF_OACTIVE to tell the stack that 2866 its OK to send packets */ 2867 #ifdef IFNET_BUF_RING 2868 flags = &ss->if_drv_flags; 2869 #else 2870 flags = &ifp->if_drv_flags; 2871 #endif 2872 mtx_lock(&ss->tx.mtx); 2873 if ((*flags) & IFF_DRV_OACTIVE && 2874 tx->req - tx->done < (tx->mask + 1)/4) { 2875 *(flags) &= ~IFF_DRV_OACTIVE; 2876 ss->tx.wake++; 2877 mxge_start_locked(ss); 2878 } 2879 #ifdef IFNET_BUF_RING 2880 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2881 /* let the NIC stop polling this queue, since there 2882 * are no more transmits pending */ 2883 if (tx->req == tx->done) { 2884 *tx->send_stop = 1; 2885 tx->queue_active = 0; 2886 tx->deactivate++; 2887 wmb(); 2888 } 2889 } 2890 #endif 2891 mtx_unlock(&ss->tx.mtx); 2892 2893 } 2894 2895 static struct mxge_media_type mxge_xfp_media_types[] = 2896 { 2897 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2898 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2899 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2900 {0, (1 << 5), "10GBASE-ER"}, 2901 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2902 {0, (1 << 3), "10GBASE-SW"}, 2903 {0, (1 << 2), "10GBASE-LW"}, 2904 {0, (1 << 1), "10GBASE-EW"}, 2905 {0, (1 << 0), "Reserved"} 2906 }; 2907 static struct mxge_media_type mxge_sfp_media_types[] = 2908 { 2909 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2910 {0, (1 << 7), "Reserved"}, 2911 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2912 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2913 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2914 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2915 }; 2916 2917 static void 2918 mxge_media_set(mxge_softc_t *sc, int media_type) 2919 { 2920 2921 2922 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2923 0, NULL); 2924 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2925 sc->current_media = media_type; 2926 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2927 } 2928 2929 static void 2930 mxge_media_init(mxge_softc_t *sc) 2931 { 2932 char *ptr; 2933 int i; 2934 2935 ifmedia_removeall(&sc->media); 2936 mxge_media_set(sc, IFM_AUTO); 2937 2938 /* 2939 * parse the product code to deterimine the interface type 2940 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2941 * after the 3rd dash in the driver's cached copy of the 2942 * EEPROM's product code string. 2943 */ 2944 ptr = sc->product_code_string; 2945 if (ptr == NULL) { 2946 device_printf(sc->dev, "Missing product code\n"); 2947 return; 2948 } 2949 2950 for (i = 0; i < 3; i++, ptr++) { 2951 ptr = strchr(ptr, '-'); 2952 if (ptr == NULL) { 2953 device_printf(sc->dev, 2954 "only %d dashes in PC?!?\n", i); 2955 return; 2956 } 2957 } 2958 if (*ptr == 'C' || *(ptr +1) == 'C') { 2959 /* -C is CX4 */ 2960 sc->connector = MXGE_CX4; 2961 mxge_media_set(sc, IFM_10G_CX4); 2962 } else if (*ptr == 'Q') { 2963 /* -Q is Quad Ribbon Fiber */ 2964 sc->connector = MXGE_QRF; 2965 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2966 /* FreeBSD has no media type for Quad ribbon fiber */ 2967 } else if (*ptr == 'R') { 2968 /* -R is XFP */ 2969 sc->connector = MXGE_XFP; 2970 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2971 /* -S or -2S is SFP+ */ 2972 sc->connector = MXGE_SFP; 2973 } else { 2974 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2975 } 2976 } 2977 2978 /* 2979 * Determine the media type for a NIC. Some XFPs will identify 2980 * themselves only when their link is up, so this is initiated via a 2981 * link up interrupt. However, this can potentially take up to 2982 * several milliseconds, so it is run via the watchdog routine, rather 2983 * than in the interrupt handler itself. 2984 */ 2985 static void 2986 mxge_media_probe(mxge_softc_t *sc) 2987 { 2988 mxge_cmd_t cmd; 2989 char *cage_type; 2990 2991 struct mxge_media_type *mxge_media_types = NULL; 2992 int i, err, ms, mxge_media_type_entries; 2993 uint32_t byte; 2994 2995 sc->need_media_probe = 0; 2996 2997 if (sc->connector == MXGE_XFP) { 2998 /* -R is XFP */ 2999 mxge_media_types = mxge_xfp_media_types; 3000 mxge_media_type_entries = 3001 sizeof (mxge_xfp_media_types) / 3002 sizeof (mxge_xfp_media_types[0]); 3003 byte = MXGE_XFP_COMPLIANCE_BYTE; 3004 cage_type = "XFP"; 3005 } else if (sc->connector == MXGE_SFP) { 3006 /* -S or -2S is SFP+ */ 3007 mxge_media_types = mxge_sfp_media_types; 3008 mxge_media_type_entries = 3009 sizeof (mxge_sfp_media_types) / 3010 sizeof (mxge_sfp_media_types[0]); 3011 cage_type = "SFP+"; 3012 byte = 3; 3013 } else { 3014 /* nothing to do; media type cannot change */ 3015 return; 3016 } 3017 3018 /* 3019 * At this point we know the NIC has an XFP cage, so now we 3020 * try to determine what is in the cage by using the 3021 * firmware's XFP I2C commands to read the XFP 10GbE compilance 3022 * register. We read just one byte, which may take over 3023 * a millisecond 3024 */ 3025 3026 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3027 cmd.data1 = byte; 3028 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3029 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3030 device_printf(sc->dev, "failed to read XFP\n"); 3031 } 3032 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3033 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3034 } 3035 if (err != MXGEFW_CMD_OK) { 3036 return; 3037 } 3038 3039 /* now we wait for the data to be cached */ 3040 cmd.data0 = byte; 3041 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3042 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3043 DELAY(1000); 3044 cmd.data0 = byte; 3045 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3046 } 3047 if (err != MXGEFW_CMD_OK) { 3048 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3049 cage_type, err, ms); 3050 return; 3051 } 3052 3053 if (cmd.data0 == mxge_media_types[0].bitmask) { 3054 if (mxge_verbose) 3055 device_printf(sc->dev, "%s:%s\n", cage_type, 3056 mxge_media_types[0].name); 3057 if (sc->current_media != mxge_media_types[0].flag) { 3058 mxge_media_init(sc); 3059 mxge_media_set(sc, mxge_media_types[0].flag); 3060 } 3061 return; 3062 } 3063 for (i = 1; i < mxge_media_type_entries; i++) { 3064 if (cmd.data0 & mxge_media_types[i].bitmask) { 3065 if (mxge_verbose) 3066 device_printf(sc->dev, "%s:%s\n", 3067 cage_type, 3068 mxge_media_types[i].name); 3069 3070 if (sc->current_media != mxge_media_types[i].flag) { 3071 mxge_media_init(sc); 3072 mxge_media_set(sc, mxge_media_types[i].flag); 3073 } 3074 return; 3075 } 3076 } 3077 if (mxge_verbose) 3078 device_printf(sc->dev, "%s media 0x%x unknown\n", 3079 cage_type, cmd.data0); 3080 3081 return; 3082 } 3083 3084 static void 3085 mxge_intr(void *arg) 3086 { 3087 struct mxge_slice_state *ss = arg; 3088 mxge_softc_t *sc = ss->sc; 3089 mcp_irq_data_t *stats = ss->fw_stats; 3090 mxge_tx_ring_t *tx = &ss->tx; 3091 mxge_rx_done_t *rx_done = &ss->rx_done; 3092 uint32_t send_done_count; 3093 uint8_t valid; 3094 3095 3096 #ifndef IFNET_BUF_RING 3097 /* an interrupt on a non-zero slice is implicitly valid 3098 since MSI-X irqs are not shared */ 3099 if (ss != sc->ss) { 3100 mxge_clean_rx_done(ss); 3101 *ss->irq_claim = be32toh(3); 3102 return; 3103 } 3104 #endif 3105 3106 /* make sure the DMA has finished */ 3107 if (!stats->valid) { 3108 return; 3109 } 3110 valid = stats->valid; 3111 3112 if (sc->legacy_irq) { 3113 /* lower legacy IRQ */ 3114 *sc->irq_deassert = 0; 3115 if (!mxge_deassert_wait) 3116 /* don't wait for conf. that irq is low */ 3117 stats->valid = 0; 3118 } else { 3119 stats->valid = 0; 3120 } 3121 3122 /* loop while waiting for legacy irq deassertion */ 3123 do { 3124 /* check for transmit completes and receives */ 3125 send_done_count = be32toh(stats->send_done_count); 3126 while ((send_done_count != tx->pkt_done) || 3127 (rx_done->entry[rx_done->idx].length != 0)) { 3128 if (send_done_count != tx->pkt_done) 3129 mxge_tx_done(ss, (int)send_done_count); 3130 mxge_clean_rx_done(ss); 3131 send_done_count = be32toh(stats->send_done_count); 3132 } 3133 if (sc->legacy_irq && mxge_deassert_wait) 3134 wmb(); 3135 } while (*((volatile uint8_t *) &stats->valid)); 3136 3137 /* fw link & error stats meaningful only on the first slice */ 3138 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3139 if (sc->link_state != stats->link_up) { 3140 sc->link_state = stats->link_up; 3141 if (sc->link_state) { 3142 if_link_state_change(sc->ifp, LINK_STATE_UP); 3143 if_initbaudrate(sc->ifp, IF_Gbps(10)); 3144 if (mxge_verbose) 3145 device_printf(sc->dev, "link up\n"); 3146 } else { 3147 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3148 sc->ifp->if_baudrate = 0; 3149 if (mxge_verbose) 3150 device_printf(sc->dev, "link down\n"); 3151 } 3152 sc->need_media_probe = 1; 3153 } 3154 if (sc->rdma_tags_available != 3155 be32toh(stats->rdma_tags_available)) { 3156 sc->rdma_tags_available = 3157 be32toh(stats->rdma_tags_available); 3158 device_printf(sc->dev, "RDMA timed out! %d tags " 3159 "left\n", sc->rdma_tags_available); 3160 } 3161 3162 if (stats->link_down) { 3163 sc->down_cnt += stats->link_down; 3164 sc->link_state = 0; 3165 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3166 } 3167 } 3168 3169 /* check to see if we have rx token to pass back */ 3170 if (valid & 0x1) 3171 *ss->irq_claim = be32toh(3); 3172 *(ss->irq_claim + 1) = be32toh(3); 3173 } 3174 3175 static void 3176 mxge_init(void *arg) 3177 { 3178 mxge_softc_t *sc = arg; 3179 struct ifnet *ifp = sc->ifp; 3180 3181 3182 mtx_lock(&sc->driver_mtx); 3183 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3184 (void) mxge_open(sc); 3185 mtx_unlock(&sc->driver_mtx); 3186 } 3187 3188 3189 3190 static void 3191 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3192 { 3193 int i; 3194 3195 #if defined(INET) || defined(INET6) 3196 tcp_lro_free(&ss->lc); 3197 #endif 3198 for (i = 0; i <= ss->rx_big.mask; i++) { 3199 if (ss->rx_big.info[i].m == NULL) 3200 continue; 3201 bus_dmamap_unload(ss->rx_big.dmat, 3202 ss->rx_big.info[i].map); 3203 m_freem(ss->rx_big.info[i].m); 3204 ss->rx_big.info[i].m = NULL; 3205 } 3206 3207 for (i = 0; i <= ss->rx_small.mask; i++) { 3208 if (ss->rx_small.info[i].m == NULL) 3209 continue; 3210 bus_dmamap_unload(ss->rx_small.dmat, 3211 ss->rx_small.info[i].map); 3212 m_freem(ss->rx_small.info[i].m); 3213 ss->rx_small.info[i].m = NULL; 3214 } 3215 3216 /* transmit ring used only on the first slice */ 3217 if (ss->tx.info == NULL) 3218 return; 3219 3220 for (i = 0; i <= ss->tx.mask; i++) { 3221 ss->tx.info[i].flag = 0; 3222 if (ss->tx.info[i].m == NULL) 3223 continue; 3224 bus_dmamap_unload(ss->tx.dmat, 3225 ss->tx.info[i].map); 3226 m_freem(ss->tx.info[i].m); 3227 ss->tx.info[i].m = NULL; 3228 } 3229 } 3230 3231 static void 3232 mxge_free_mbufs(mxge_softc_t *sc) 3233 { 3234 int slice; 3235 3236 for (slice = 0; slice < sc->num_slices; slice++) 3237 mxge_free_slice_mbufs(&sc->ss[slice]); 3238 } 3239 3240 static void 3241 mxge_free_slice_rings(struct mxge_slice_state *ss) 3242 { 3243 int i; 3244 3245 3246 if (ss->rx_done.entry != NULL) 3247 mxge_dma_free(&ss->rx_done.dma); 3248 ss->rx_done.entry = NULL; 3249 3250 if (ss->tx.req_bytes != NULL) 3251 free(ss->tx.req_bytes, M_DEVBUF); 3252 ss->tx.req_bytes = NULL; 3253 3254 if (ss->tx.seg_list != NULL) 3255 free(ss->tx.seg_list, M_DEVBUF); 3256 ss->tx.seg_list = NULL; 3257 3258 if (ss->rx_small.shadow != NULL) 3259 free(ss->rx_small.shadow, M_DEVBUF); 3260 ss->rx_small.shadow = NULL; 3261 3262 if (ss->rx_big.shadow != NULL) 3263 free(ss->rx_big.shadow, M_DEVBUF); 3264 ss->rx_big.shadow = NULL; 3265 3266 if (ss->tx.info != NULL) { 3267 if (ss->tx.dmat != NULL) { 3268 for (i = 0; i <= ss->tx.mask; i++) { 3269 bus_dmamap_destroy(ss->tx.dmat, 3270 ss->tx.info[i].map); 3271 } 3272 bus_dma_tag_destroy(ss->tx.dmat); 3273 } 3274 free(ss->tx.info, M_DEVBUF); 3275 } 3276 ss->tx.info = NULL; 3277 3278 if (ss->rx_small.info != NULL) { 3279 if (ss->rx_small.dmat != NULL) { 3280 for (i = 0; i <= ss->rx_small.mask; i++) { 3281 bus_dmamap_destroy(ss->rx_small.dmat, 3282 ss->rx_small.info[i].map); 3283 } 3284 bus_dmamap_destroy(ss->rx_small.dmat, 3285 ss->rx_small.extra_map); 3286 bus_dma_tag_destroy(ss->rx_small.dmat); 3287 } 3288 free(ss->rx_small.info, M_DEVBUF); 3289 } 3290 ss->rx_small.info = NULL; 3291 3292 if (ss->rx_big.info != NULL) { 3293 if (ss->rx_big.dmat != NULL) { 3294 for (i = 0; i <= ss->rx_big.mask; i++) { 3295 bus_dmamap_destroy(ss->rx_big.dmat, 3296 ss->rx_big.info[i].map); 3297 } 3298 bus_dmamap_destroy(ss->rx_big.dmat, 3299 ss->rx_big.extra_map); 3300 bus_dma_tag_destroy(ss->rx_big.dmat); 3301 } 3302 free(ss->rx_big.info, M_DEVBUF); 3303 } 3304 ss->rx_big.info = NULL; 3305 } 3306 3307 static void 3308 mxge_free_rings(mxge_softc_t *sc) 3309 { 3310 int slice; 3311 3312 for (slice = 0; slice < sc->num_slices; slice++) 3313 mxge_free_slice_rings(&sc->ss[slice]); 3314 } 3315 3316 static int 3317 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3318 int tx_ring_entries) 3319 { 3320 mxge_softc_t *sc = ss->sc; 3321 size_t bytes; 3322 int err, i; 3323 3324 /* allocate per-slice receive resources */ 3325 3326 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3327 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3328 3329 /* allocate the rx shadow rings */ 3330 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3331 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3332 3333 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3334 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3335 3336 /* allocate the rx host info rings */ 3337 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3338 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3339 3340 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3341 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3342 3343 /* allocate the rx busdma resources */ 3344 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3345 1, /* alignment */ 3346 4096, /* boundary */ 3347 BUS_SPACE_MAXADDR, /* low */ 3348 BUS_SPACE_MAXADDR, /* high */ 3349 NULL, NULL, /* filter */ 3350 MHLEN, /* maxsize */ 3351 1, /* num segs */ 3352 MHLEN, /* maxsegsize */ 3353 BUS_DMA_ALLOCNOW, /* flags */ 3354 NULL, NULL, /* lock */ 3355 &ss->rx_small.dmat); /* tag */ 3356 if (err != 0) { 3357 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3358 err); 3359 return err; 3360 } 3361 3362 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3363 1, /* alignment */ 3364 #if MXGE_VIRT_JUMBOS 3365 4096, /* boundary */ 3366 #else 3367 0, /* boundary */ 3368 #endif 3369 BUS_SPACE_MAXADDR, /* low */ 3370 BUS_SPACE_MAXADDR, /* high */ 3371 NULL, NULL, /* filter */ 3372 3*4096, /* maxsize */ 3373 #if MXGE_VIRT_JUMBOS 3374 3, /* num segs */ 3375 4096, /* maxsegsize*/ 3376 #else 3377 1, /* num segs */ 3378 MJUM9BYTES, /* maxsegsize*/ 3379 #endif 3380 BUS_DMA_ALLOCNOW, /* flags */ 3381 NULL, NULL, /* lock */ 3382 &ss->rx_big.dmat); /* tag */ 3383 if (err != 0) { 3384 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3385 err); 3386 return err; 3387 } 3388 for (i = 0; i <= ss->rx_small.mask; i++) { 3389 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3390 &ss->rx_small.info[i].map); 3391 if (err != 0) { 3392 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3393 err); 3394 return err; 3395 } 3396 } 3397 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3398 &ss->rx_small.extra_map); 3399 if (err != 0) { 3400 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3401 err); 3402 return err; 3403 } 3404 3405 for (i = 0; i <= ss->rx_big.mask; i++) { 3406 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3407 &ss->rx_big.info[i].map); 3408 if (err != 0) { 3409 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3410 err); 3411 return err; 3412 } 3413 } 3414 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3415 &ss->rx_big.extra_map); 3416 if (err != 0) { 3417 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3418 err); 3419 return err; 3420 } 3421 3422 /* now allocate TX resources */ 3423 3424 #ifndef IFNET_BUF_RING 3425 /* only use a single TX ring for now */ 3426 if (ss != ss->sc->ss) 3427 return 0; 3428 #endif 3429 3430 ss->tx.mask = tx_ring_entries - 1; 3431 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3432 3433 3434 /* allocate the tx request copy block */ 3435 bytes = 8 + 3436 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3437 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3438 /* ensure req_list entries are aligned to 8 bytes */ 3439 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3440 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3441 3442 /* allocate the tx busdma segment list */ 3443 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3444 ss->tx.seg_list = (bus_dma_segment_t *) 3445 malloc(bytes, M_DEVBUF, M_WAITOK); 3446 3447 /* allocate the tx host info ring */ 3448 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3449 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3450 3451 /* allocate the tx busdma resources */ 3452 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3453 1, /* alignment */ 3454 sc->tx_boundary, /* boundary */ 3455 BUS_SPACE_MAXADDR, /* low */ 3456 BUS_SPACE_MAXADDR, /* high */ 3457 NULL, NULL, /* filter */ 3458 65536 + 256, /* maxsize */ 3459 ss->tx.max_desc - 2, /* num segs */ 3460 sc->tx_boundary, /* maxsegsz */ 3461 BUS_DMA_ALLOCNOW, /* flags */ 3462 NULL, NULL, /* lock */ 3463 &ss->tx.dmat); /* tag */ 3464 3465 if (err != 0) { 3466 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3467 err); 3468 return err; 3469 } 3470 3471 /* now use these tags to setup dmamaps for each slot 3472 in the ring */ 3473 for (i = 0; i <= ss->tx.mask; i++) { 3474 err = bus_dmamap_create(ss->tx.dmat, 0, 3475 &ss->tx.info[i].map); 3476 if (err != 0) { 3477 device_printf(sc->dev, "Err %d tx dmamap\n", 3478 err); 3479 return err; 3480 } 3481 } 3482 return 0; 3483 3484 } 3485 3486 static int 3487 mxge_alloc_rings(mxge_softc_t *sc) 3488 { 3489 mxge_cmd_t cmd; 3490 int tx_ring_size; 3491 int tx_ring_entries, rx_ring_entries; 3492 int err, slice; 3493 3494 /* get ring sizes */ 3495 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3496 tx_ring_size = cmd.data0; 3497 if (err != 0) { 3498 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3499 goto abort; 3500 } 3501 3502 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3503 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3504 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3505 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3506 IFQ_SET_READY(&sc->ifp->if_snd); 3507 3508 for (slice = 0; slice < sc->num_slices; slice++) { 3509 err = mxge_alloc_slice_rings(&sc->ss[slice], 3510 rx_ring_entries, 3511 tx_ring_entries); 3512 if (err != 0) 3513 goto abort; 3514 } 3515 return 0; 3516 3517 abort: 3518 mxge_free_rings(sc); 3519 return err; 3520 3521 } 3522 3523 3524 static void 3525 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3526 { 3527 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3528 3529 if (bufsize < MCLBYTES) { 3530 /* easy, everything fits in a single buffer */ 3531 *big_buf_size = MCLBYTES; 3532 *cl_size = MCLBYTES; 3533 *nbufs = 1; 3534 return; 3535 } 3536 3537 if (bufsize < MJUMPAGESIZE) { 3538 /* still easy, everything still fits in a single buffer */ 3539 *big_buf_size = MJUMPAGESIZE; 3540 *cl_size = MJUMPAGESIZE; 3541 *nbufs = 1; 3542 return; 3543 } 3544 #if MXGE_VIRT_JUMBOS 3545 /* now we need to use virtually contiguous buffers */ 3546 *cl_size = MJUM9BYTES; 3547 *big_buf_size = 4096; 3548 *nbufs = mtu / 4096 + 1; 3549 /* needs to be a power of two, so round up */ 3550 if (*nbufs == 3) 3551 *nbufs = 4; 3552 #else 3553 *cl_size = MJUM9BYTES; 3554 *big_buf_size = MJUM9BYTES; 3555 *nbufs = 1; 3556 #endif 3557 } 3558 3559 static int 3560 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3561 { 3562 mxge_softc_t *sc; 3563 mxge_cmd_t cmd; 3564 bus_dmamap_t map; 3565 int err, i, slice; 3566 3567 3568 sc = ss->sc; 3569 slice = ss - sc->ss; 3570 3571 #if defined(INET) || defined(INET6) 3572 (void)tcp_lro_init(&ss->lc); 3573 #endif 3574 ss->lc.ifp = sc->ifp; 3575 3576 /* get the lanai pointers to the send and receive rings */ 3577 3578 err = 0; 3579 #ifndef IFNET_BUF_RING 3580 /* We currently only send from the first slice */ 3581 if (slice == 0) { 3582 #endif 3583 cmd.data0 = slice; 3584 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3585 ss->tx.lanai = 3586 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3587 ss->tx.send_go = (volatile uint32_t *) 3588 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3589 ss->tx.send_stop = (volatile uint32_t *) 3590 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3591 #ifndef IFNET_BUF_RING 3592 } 3593 #endif 3594 cmd.data0 = slice; 3595 err |= mxge_send_cmd(sc, 3596 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3597 ss->rx_small.lanai = 3598 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3599 cmd.data0 = slice; 3600 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3601 ss->rx_big.lanai = 3602 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3603 3604 if (err != 0) { 3605 device_printf(sc->dev, 3606 "failed to get ring sizes or locations\n"); 3607 return EIO; 3608 } 3609 3610 /* stock receive rings */ 3611 for (i = 0; i <= ss->rx_small.mask; i++) { 3612 map = ss->rx_small.info[i].map; 3613 err = mxge_get_buf_small(ss, map, i); 3614 if (err) { 3615 device_printf(sc->dev, "alloced %d/%d smalls\n", 3616 i, ss->rx_small.mask + 1); 3617 return ENOMEM; 3618 } 3619 } 3620 for (i = 0; i <= ss->rx_big.mask; i++) { 3621 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3622 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3623 } 3624 ss->rx_big.nbufs = nbufs; 3625 ss->rx_big.cl_size = cl_size; 3626 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3627 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3628 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3629 map = ss->rx_big.info[i].map; 3630 err = mxge_get_buf_big(ss, map, i); 3631 if (err) { 3632 device_printf(sc->dev, "alloced %d/%d bigs\n", 3633 i, ss->rx_big.mask + 1); 3634 return ENOMEM; 3635 } 3636 } 3637 return 0; 3638 } 3639 3640 static int 3641 mxge_open(mxge_softc_t *sc) 3642 { 3643 mxge_cmd_t cmd; 3644 int err, big_bytes, nbufs, slice, cl_size, i; 3645 bus_addr_t bus; 3646 volatile uint8_t *itable; 3647 struct mxge_slice_state *ss; 3648 3649 /* Copy the MAC address in case it was overridden */ 3650 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3651 3652 err = mxge_reset(sc, 1); 3653 if (err != 0) { 3654 device_printf(sc->dev, "failed to reset\n"); 3655 return EIO; 3656 } 3657 3658 if (sc->num_slices > 1) { 3659 /* setup the indirection table */ 3660 cmd.data0 = sc->num_slices; 3661 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3662 &cmd); 3663 3664 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3665 &cmd); 3666 if (err != 0) { 3667 device_printf(sc->dev, 3668 "failed to setup rss tables\n"); 3669 return err; 3670 } 3671 3672 /* just enable an identity mapping */ 3673 itable = sc->sram + cmd.data0; 3674 for (i = 0; i < sc->num_slices; i++) 3675 itable[i] = (uint8_t)i; 3676 3677 cmd.data0 = 1; 3678 cmd.data1 = mxge_rss_hash_type; 3679 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3680 if (err != 0) { 3681 device_printf(sc->dev, "failed to enable slices\n"); 3682 return err; 3683 } 3684 } 3685 3686 3687 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3688 3689 cmd.data0 = nbufs; 3690 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3691 &cmd); 3692 /* error is only meaningful if we're trying to set 3693 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3694 if (err && nbufs > 1) { 3695 device_printf(sc->dev, 3696 "Failed to set alway-use-n to %d\n", 3697 nbufs); 3698 return EIO; 3699 } 3700 /* Give the firmware the mtu and the big and small buffer 3701 sizes. The firmware wants the big buf size to be a power 3702 of two. Luckily, FreeBSD's clusters are powers of two */ 3703 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3704 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3705 cmd.data0 = MHLEN - MXGEFW_PAD; 3706 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3707 &cmd); 3708 cmd.data0 = big_bytes; 3709 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3710 3711 if (err != 0) { 3712 device_printf(sc->dev, "failed to setup params\n"); 3713 goto abort; 3714 } 3715 3716 /* Now give him the pointer to the stats block */ 3717 for (slice = 0; 3718 #ifdef IFNET_BUF_RING 3719 slice < sc->num_slices; 3720 #else 3721 slice < 1; 3722 #endif 3723 slice++) { 3724 ss = &sc->ss[slice]; 3725 cmd.data0 = 3726 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3727 cmd.data1 = 3728 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3729 cmd.data2 = sizeof(struct mcp_irq_data); 3730 cmd.data2 |= (slice << 16); 3731 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3732 } 3733 3734 if (err != 0) { 3735 bus = sc->ss->fw_stats_dma.bus_addr; 3736 bus += offsetof(struct mcp_irq_data, send_done_count); 3737 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3738 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3739 err = mxge_send_cmd(sc, 3740 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3741 &cmd); 3742 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3743 sc->fw_multicast_support = 0; 3744 } else { 3745 sc->fw_multicast_support = 1; 3746 } 3747 3748 if (err != 0) { 3749 device_printf(sc->dev, "failed to setup params\n"); 3750 goto abort; 3751 } 3752 3753 for (slice = 0; slice < sc->num_slices; slice++) { 3754 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3755 if (err != 0) { 3756 device_printf(sc->dev, "couldn't open slice %d\n", 3757 slice); 3758 goto abort; 3759 } 3760 } 3761 3762 /* Finally, start the firmware running */ 3763 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3764 if (err) { 3765 device_printf(sc->dev, "Couldn't bring up link\n"); 3766 goto abort; 3767 } 3768 #ifdef IFNET_BUF_RING 3769 for (slice = 0; slice < sc->num_slices; slice++) { 3770 ss = &sc->ss[slice]; 3771 ss->if_drv_flags |= IFF_DRV_RUNNING; 3772 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3773 } 3774 #endif 3775 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3776 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3777 3778 return 0; 3779 3780 3781 abort: 3782 mxge_free_mbufs(sc); 3783 3784 return err; 3785 } 3786 3787 static int 3788 mxge_close(mxge_softc_t *sc, int down) 3789 { 3790 mxge_cmd_t cmd; 3791 int err, old_down_cnt; 3792 #ifdef IFNET_BUF_RING 3793 struct mxge_slice_state *ss; 3794 int slice; 3795 #endif 3796 3797 #ifdef IFNET_BUF_RING 3798 for (slice = 0; slice < sc->num_slices; slice++) { 3799 ss = &sc->ss[slice]; 3800 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3801 } 3802 #endif 3803 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3804 if (!down) { 3805 old_down_cnt = sc->down_cnt; 3806 wmb(); 3807 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3808 if (err) { 3809 device_printf(sc->dev, 3810 "Couldn't bring down link\n"); 3811 } 3812 if (old_down_cnt == sc->down_cnt) { 3813 /* wait for down irq */ 3814 DELAY(10 * sc->intr_coal_delay); 3815 } 3816 wmb(); 3817 if (old_down_cnt == sc->down_cnt) { 3818 device_printf(sc->dev, "never got down irq\n"); 3819 } 3820 } 3821 mxge_free_mbufs(sc); 3822 3823 return 0; 3824 } 3825 3826 static void 3827 mxge_setup_cfg_space(mxge_softc_t *sc) 3828 { 3829 device_t dev = sc->dev; 3830 int reg; 3831 uint16_t lnk, pectl; 3832 3833 /* find the PCIe link width and set max read request to 4KB*/ 3834 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3835 lnk = pci_read_config(dev, reg + 0x12, 2); 3836 sc->link_width = (lnk >> 4) & 0x3f; 3837 3838 if (sc->pectl == 0) { 3839 pectl = pci_read_config(dev, reg + 0x8, 2); 3840 pectl = (pectl & ~0x7000) | (5 << 12); 3841 pci_write_config(dev, reg + 0x8, pectl, 2); 3842 sc->pectl = pectl; 3843 } else { 3844 /* restore saved pectl after watchdog reset */ 3845 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3846 } 3847 } 3848 3849 /* Enable DMA and Memory space access */ 3850 pci_enable_busmaster(dev); 3851 } 3852 3853 static uint32_t 3854 mxge_read_reboot(mxge_softc_t *sc) 3855 { 3856 device_t dev = sc->dev; 3857 uint32_t vs; 3858 3859 /* find the vendor specific offset */ 3860 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3861 device_printf(sc->dev, 3862 "could not find vendor specific offset\n"); 3863 return (uint32_t)-1; 3864 } 3865 /* enable read32 mode */ 3866 pci_write_config(dev, vs + 0x10, 0x3, 1); 3867 /* tell NIC which register to read */ 3868 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3869 return (pci_read_config(dev, vs + 0x14, 4)); 3870 } 3871 3872 static void 3873 mxge_watchdog_reset(mxge_softc_t *sc) 3874 { 3875 struct pci_devinfo *dinfo; 3876 struct mxge_slice_state *ss; 3877 int err, running, s, num_tx_slices = 1; 3878 uint32_t reboot; 3879 uint16_t cmd; 3880 3881 err = ENXIO; 3882 3883 device_printf(sc->dev, "Watchdog reset!\n"); 3884 3885 /* 3886 * check to see if the NIC rebooted. If it did, then all of 3887 * PCI config space has been reset, and things like the 3888 * busmaster bit will be zero. If this is the case, then we 3889 * must restore PCI config space before the NIC can be used 3890 * again 3891 */ 3892 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3893 if (cmd == 0xffff) { 3894 /* 3895 * maybe the watchdog caught the NIC rebooting; wait 3896 * up to 100ms for it to finish. If it does not come 3897 * back, then give up 3898 */ 3899 DELAY(1000*100); 3900 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3901 if (cmd == 0xffff) { 3902 device_printf(sc->dev, "NIC disappeared!\n"); 3903 } 3904 } 3905 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3906 /* print the reboot status */ 3907 reboot = mxge_read_reboot(sc); 3908 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3909 reboot); 3910 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3911 if (running) { 3912 3913 /* 3914 * quiesce NIC so that TX routines will not try to 3915 * xmit after restoration of BAR 3916 */ 3917 3918 /* Mark the link as down */ 3919 if (sc->link_state) { 3920 sc->link_state = 0; 3921 if_link_state_change(sc->ifp, 3922 LINK_STATE_DOWN); 3923 } 3924 #ifdef IFNET_BUF_RING 3925 num_tx_slices = sc->num_slices; 3926 #endif 3927 /* grab all TX locks to ensure no tx */ 3928 for (s = 0; s < num_tx_slices; s++) { 3929 ss = &sc->ss[s]; 3930 mtx_lock(&ss->tx.mtx); 3931 } 3932 mxge_close(sc, 1); 3933 } 3934 /* restore PCI configuration space */ 3935 dinfo = device_get_ivars(sc->dev); 3936 pci_cfg_restore(sc->dev, dinfo); 3937 3938 /* and redo any changes we made to our config space */ 3939 mxge_setup_cfg_space(sc); 3940 3941 /* reload f/w */ 3942 err = mxge_load_firmware(sc, 0); 3943 if (err) { 3944 device_printf(sc->dev, 3945 "Unable to re-load f/w\n"); 3946 } 3947 if (running) { 3948 if (!err) 3949 err = mxge_open(sc); 3950 /* release all TX locks */ 3951 for (s = 0; s < num_tx_slices; s++) { 3952 ss = &sc->ss[s]; 3953 #ifdef IFNET_BUF_RING 3954 mxge_start_locked(ss); 3955 #endif 3956 mtx_unlock(&ss->tx.mtx); 3957 } 3958 } 3959 sc->watchdog_resets++; 3960 } else { 3961 device_printf(sc->dev, 3962 "NIC did not reboot, not resetting\n"); 3963 err = 0; 3964 } 3965 if (err) { 3966 device_printf(sc->dev, "watchdog reset failed\n"); 3967 } else { 3968 if (sc->dying == 2) 3969 sc->dying = 0; 3970 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3971 } 3972 } 3973 3974 static void 3975 mxge_watchdog_task(void *arg, int pending) 3976 { 3977 mxge_softc_t *sc = arg; 3978 3979 3980 mtx_lock(&sc->driver_mtx); 3981 mxge_watchdog_reset(sc); 3982 mtx_unlock(&sc->driver_mtx); 3983 } 3984 3985 static void 3986 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3987 { 3988 tx = &sc->ss[slice].tx; 3989 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3990 device_printf(sc->dev, 3991 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3992 tx->req, tx->done, tx->queue_active); 3993 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3994 tx->activate, tx->deactivate); 3995 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3996 tx->pkt_done, 3997 be32toh(sc->ss->fw_stats->send_done_count)); 3998 } 3999 4000 static int 4001 mxge_watchdog(mxge_softc_t *sc) 4002 { 4003 mxge_tx_ring_t *tx; 4004 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 4005 int i, err = 0; 4006 4007 /* see if we have outstanding transmits, which 4008 have been pending for more than mxge_ticks */ 4009 for (i = 0; 4010 #ifdef IFNET_BUF_RING 4011 (i < sc->num_slices) && (err == 0); 4012 #else 4013 (i < 1) && (err == 0); 4014 #endif 4015 i++) { 4016 tx = &sc->ss[i].tx; 4017 if (tx->req != tx->done && 4018 tx->watchdog_req != tx->watchdog_done && 4019 tx->done == tx->watchdog_done) { 4020 /* check for pause blocking before resetting */ 4021 if (tx->watchdog_rx_pause == rx_pause) { 4022 mxge_warn_stuck(sc, tx, i); 4023 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4024 return (ENXIO); 4025 } 4026 else 4027 device_printf(sc->dev, "Flow control blocking " 4028 "xmits, check link partner\n"); 4029 } 4030 4031 tx->watchdog_req = tx->req; 4032 tx->watchdog_done = tx->done; 4033 tx->watchdog_rx_pause = rx_pause; 4034 } 4035 4036 if (sc->need_media_probe) 4037 mxge_media_probe(sc); 4038 return (err); 4039 } 4040 4041 static u_long 4042 mxge_update_stats(mxge_softc_t *sc) 4043 { 4044 struct mxge_slice_state *ss; 4045 u_long pkts = 0; 4046 u_long ipackets = 0; 4047 u_long opackets = 0; 4048 #ifdef IFNET_BUF_RING 4049 u_long obytes = 0; 4050 u_long omcasts = 0; 4051 u_long odrops = 0; 4052 #endif 4053 u_long oerrors = 0; 4054 int slice; 4055 4056 for (slice = 0; slice < sc->num_slices; slice++) { 4057 ss = &sc->ss[slice]; 4058 ipackets += ss->ipackets; 4059 opackets += ss->opackets; 4060 #ifdef IFNET_BUF_RING 4061 obytes += ss->obytes; 4062 omcasts += ss->omcasts; 4063 odrops += ss->tx.br->br_drops; 4064 #endif 4065 oerrors += ss->oerrors; 4066 } 4067 pkts = (ipackets - sc->ifp->if_ipackets); 4068 pkts += (opackets - sc->ifp->if_opackets); 4069 sc->ifp->if_ipackets = ipackets; 4070 sc->ifp->if_opackets = opackets; 4071 #ifdef IFNET_BUF_RING 4072 sc->ifp->if_obytes = obytes; 4073 sc->ifp->if_omcasts = omcasts; 4074 sc->ifp->if_snd.ifq_drops = odrops; 4075 #endif 4076 sc->ifp->if_oerrors = oerrors; 4077 return pkts; 4078 } 4079 4080 static void 4081 mxge_tick(void *arg) 4082 { 4083 mxge_softc_t *sc = arg; 4084 u_long pkts = 0; 4085 int err = 0; 4086 int running, ticks; 4087 uint16_t cmd; 4088 4089 ticks = mxge_ticks; 4090 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4091 if (running) { 4092 /* aggregate stats from different slices */ 4093 pkts = mxge_update_stats(sc); 4094 if (!sc->watchdog_countdown) { 4095 err = mxge_watchdog(sc); 4096 sc->watchdog_countdown = 4; 4097 } 4098 sc->watchdog_countdown--; 4099 } 4100 if (pkts == 0) { 4101 /* ensure NIC did not suffer h/w fault while idle */ 4102 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4103 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4104 sc->dying = 2; 4105 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4106 err = ENXIO; 4107 } 4108 /* look less often if NIC is idle */ 4109 ticks *= 4; 4110 } 4111 4112 if (err == 0) 4113 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4114 4115 } 4116 4117 static int 4118 mxge_media_change(struct ifnet *ifp) 4119 { 4120 return EINVAL; 4121 } 4122 4123 static int 4124 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4125 { 4126 struct ifnet *ifp = sc->ifp; 4127 int real_mtu, old_mtu; 4128 int err = 0; 4129 4130 4131 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4132 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4133 return EINVAL; 4134 mtx_lock(&sc->driver_mtx); 4135 old_mtu = ifp->if_mtu; 4136 ifp->if_mtu = mtu; 4137 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4138 mxge_close(sc, 0); 4139 err = mxge_open(sc); 4140 if (err != 0) { 4141 ifp->if_mtu = old_mtu; 4142 mxge_close(sc, 0); 4143 (void) mxge_open(sc); 4144 } 4145 } 4146 mtx_unlock(&sc->driver_mtx); 4147 return err; 4148 } 4149 4150 static void 4151 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4152 { 4153 mxge_softc_t *sc = ifp->if_softc; 4154 4155 4156 if (sc == NULL) 4157 return; 4158 ifmr->ifm_status = IFM_AVALID; 4159 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4160 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4161 ifmr->ifm_active |= sc->current_media; 4162 } 4163 4164 static int 4165 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4166 { 4167 mxge_softc_t *sc = ifp->if_softc; 4168 struct ifreq *ifr = (struct ifreq *)data; 4169 int err, mask; 4170 4171 err = 0; 4172 switch (command) { 4173 case SIOCSIFADDR: 4174 case SIOCGIFADDR: 4175 err = ether_ioctl(ifp, command, data); 4176 break; 4177 4178 case SIOCSIFMTU: 4179 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4180 break; 4181 4182 case SIOCSIFFLAGS: 4183 mtx_lock(&sc->driver_mtx); 4184 if (sc->dying) { 4185 mtx_unlock(&sc->driver_mtx); 4186 return EINVAL; 4187 } 4188 if (ifp->if_flags & IFF_UP) { 4189 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4190 err = mxge_open(sc); 4191 } else { 4192 /* take care of promis can allmulti 4193 flag chages */ 4194 mxge_change_promisc(sc, 4195 ifp->if_flags & IFF_PROMISC); 4196 mxge_set_multicast_list(sc); 4197 } 4198 } else { 4199 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4200 mxge_close(sc, 0); 4201 } 4202 } 4203 mtx_unlock(&sc->driver_mtx); 4204 break; 4205 4206 case SIOCADDMULTI: 4207 case SIOCDELMULTI: 4208 mtx_lock(&sc->driver_mtx); 4209 mxge_set_multicast_list(sc); 4210 mtx_unlock(&sc->driver_mtx); 4211 break; 4212 4213 case SIOCSIFCAP: 4214 mtx_lock(&sc->driver_mtx); 4215 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4216 if (mask & IFCAP_TXCSUM) { 4217 if (IFCAP_TXCSUM & ifp->if_capenable) { 4218 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4219 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4220 } else { 4221 ifp->if_capenable |= IFCAP_TXCSUM; 4222 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4223 } 4224 } else if (mask & IFCAP_RXCSUM) { 4225 if (IFCAP_RXCSUM & ifp->if_capenable) { 4226 ifp->if_capenable &= ~IFCAP_RXCSUM; 4227 } else { 4228 ifp->if_capenable |= IFCAP_RXCSUM; 4229 } 4230 } 4231 if (mask & IFCAP_TSO4) { 4232 if (IFCAP_TSO4 & ifp->if_capenable) { 4233 ifp->if_capenable &= ~IFCAP_TSO4; 4234 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4235 ifp->if_capenable |= IFCAP_TSO4; 4236 ifp->if_hwassist |= CSUM_TSO; 4237 } else { 4238 printf("mxge requires tx checksum offload" 4239 " be enabled to use TSO\n"); 4240 err = EINVAL; 4241 } 4242 } 4243 #if IFCAP_TSO6 4244 if (mask & IFCAP_TXCSUM_IPV6) { 4245 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4246 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4247 | IFCAP_TSO6); 4248 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4249 | CSUM_UDP); 4250 } else { 4251 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4252 ifp->if_hwassist |= (CSUM_TCP_IPV6 4253 | CSUM_UDP_IPV6); 4254 } 4255 } else if (mask & IFCAP_RXCSUM_IPV6) { 4256 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4257 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4258 } else { 4259 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4260 } 4261 } 4262 if (mask & IFCAP_TSO6) { 4263 if (IFCAP_TSO6 & ifp->if_capenable) { 4264 ifp->if_capenable &= ~IFCAP_TSO6; 4265 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4266 ifp->if_capenable |= IFCAP_TSO6; 4267 ifp->if_hwassist |= CSUM_TSO; 4268 } else { 4269 printf("mxge requires tx checksum offload" 4270 " be enabled to use TSO\n"); 4271 err = EINVAL; 4272 } 4273 } 4274 #endif /*IFCAP_TSO6 */ 4275 4276 if (mask & IFCAP_LRO) 4277 ifp->if_capenable ^= IFCAP_LRO; 4278 if (mask & IFCAP_VLAN_HWTAGGING) 4279 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4280 if (mask & IFCAP_VLAN_HWTSO) 4281 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4282 4283 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4284 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4285 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4286 4287 mtx_unlock(&sc->driver_mtx); 4288 VLAN_CAPABILITIES(ifp); 4289 4290 break; 4291 4292 case SIOCGIFMEDIA: 4293 mtx_lock(&sc->driver_mtx); 4294 mxge_media_probe(sc); 4295 mtx_unlock(&sc->driver_mtx); 4296 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4297 &sc->media, command); 4298 break; 4299 4300 default: 4301 err = ENOTTY; 4302 } 4303 return err; 4304 } 4305 4306 static void 4307 mxge_fetch_tunables(mxge_softc_t *sc) 4308 { 4309 4310 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4311 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4312 &mxge_flow_control); 4313 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4314 &mxge_intr_coal_delay); 4315 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4316 &mxge_nvidia_ecrc_enable); 4317 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4318 &mxge_force_firmware); 4319 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4320 &mxge_deassert_wait); 4321 TUNABLE_INT_FETCH("hw.mxge.verbose", 4322 &mxge_verbose); 4323 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4324 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4325 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4326 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4327 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4328 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4329 4330 if (bootverbose) 4331 mxge_verbose = 1; 4332 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4333 mxge_intr_coal_delay = 30; 4334 if (mxge_ticks == 0) 4335 mxge_ticks = hz / 2; 4336 sc->pause = mxge_flow_control; 4337 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4338 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4339 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4340 } 4341 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4342 mxge_initial_mtu < ETHER_MIN_LEN) 4343 mxge_initial_mtu = ETHERMTU_JUMBO; 4344 4345 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4346 mxge_throttle = MXGE_MAX_THROTTLE; 4347 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4348 mxge_throttle = MXGE_MIN_THROTTLE; 4349 sc->throttle = mxge_throttle; 4350 } 4351 4352 4353 static void 4354 mxge_free_slices(mxge_softc_t *sc) 4355 { 4356 struct mxge_slice_state *ss; 4357 int i; 4358 4359 4360 if (sc->ss == NULL) 4361 return; 4362 4363 for (i = 0; i < sc->num_slices; i++) { 4364 ss = &sc->ss[i]; 4365 if (ss->fw_stats != NULL) { 4366 mxge_dma_free(&ss->fw_stats_dma); 4367 ss->fw_stats = NULL; 4368 #ifdef IFNET_BUF_RING 4369 if (ss->tx.br != NULL) { 4370 drbr_free(ss->tx.br, M_DEVBUF); 4371 ss->tx.br = NULL; 4372 } 4373 #endif 4374 mtx_destroy(&ss->tx.mtx); 4375 } 4376 if (ss->rx_done.entry != NULL) { 4377 mxge_dma_free(&ss->rx_done.dma); 4378 ss->rx_done.entry = NULL; 4379 } 4380 } 4381 free(sc->ss, M_DEVBUF); 4382 sc->ss = NULL; 4383 } 4384 4385 static int 4386 mxge_alloc_slices(mxge_softc_t *sc) 4387 { 4388 mxge_cmd_t cmd; 4389 struct mxge_slice_state *ss; 4390 size_t bytes; 4391 int err, i, max_intr_slots; 4392 4393 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4394 if (err != 0) { 4395 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4396 return err; 4397 } 4398 sc->rx_ring_size = cmd.data0; 4399 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4400 4401 bytes = sizeof (*sc->ss) * sc->num_slices; 4402 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4403 if (sc->ss == NULL) 4404 return (ENOMEM); 4405 for (i = 0; i < sc->num_slices; i++) { 4406 ss = &sc->ss[i]; 4407 4408 ss->sc = sc; 4409 4410 /* allocate per-slice rx interrupt queues */ 4411 4412 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4413 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4414 if (err != 0) 4415 goto abort; 4416 ss->rx_done.entry = ss->rx_done.dma.addr; 4417 bzero(ss->rx_done.entry, bytes); 4418 4419 /* 4420 * allocate the per-slice firmware stats; stats 4421 * (including tx) are used used only on the first 4422 * slice for now 4423 */ 4424 #ifndef IFNET_BUF_RING 4425 if (i > 0) 4426 continue; 4427 #endif 4428 4429 bytes = sizeof (*ss->fw_stats); 4430 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4431 sizeof (*ss->fw_stats), 64); 4432 if (err != 0) 4433 goto abort; 4434 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4435 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4436 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4437 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4438 #ifdef IFNET_BUF_RING 4439 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4440 &ss->tx.mtx); 4441 #endif 4442 } 4443 4444 return (0); 4445 4446 abort: 4447 mxge_free_slices(sc); 4448 return (ENOMEM); 4449 } 4450 4451 static void 4452 mxge_slice_probe(mxge_softc_t *sc) 4453 { 4454 mxge_cmd_t cmd; 4455 char *old_fw; 4456 int msix_cnt, status, max_intr_slots; 4457 4458 sc->num_slices = 1; 4459 /* 4460 * don't enable multiple slices if they are not enabled, 4461 * or if this is not an SMP system 4462 */ 4463 4464 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4465 return; 4466 4467 /* see how many MSI-X interrupts are available */ 4468 msix_cnt = pci_msix_count(sc->dev); 4469 if (msix_cnt < 2) 4470 return; 4471 4472 /* now load the slice aware firmware see what it supports */ 4473 old_fw = sc->fw_name; 4474 if (old_fw == mxge_fw_aligned) 4475 sc->fw_name = mxge_fw_rss_aligned; 4476 else 4477 sc->fw_name = mxge_fw_rss_unaligned; 4478 status = mxge_load_firmware(sc, 0); 4479 if (status != 0) { 4480 device_printf(sc->dev, "Falling back to a single slice\n"); 4481 return; 4482 } 4483 4484 /* try to send a reset command to the card to see if it 4485 is alive */ 4486 memset(&cmd, 0, sizeof (cmd)); 4487 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4488 if (status != 0) { 4489 device_printf(sc->dev, "failed reset\n"); 4490 goto abort_with_fw; 4491 } 4492 4493 /* get rx ring size */ 4494 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4495 if (status != 0) { 4496 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4497 goto abort_with_fw; 4498 } 4499 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4500 4501 /* tell it the size of the interrupt queues */ 4502 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4503 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4504 if (status != 0) { 4505 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4506 goto abort_with_fw; 4507 } 4508 4509 /* ask the maximum number of slices it supports */ 4510 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4511 if (status != 0) { 4512 device_printf(sc->dev, 4513 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4514 goto abort_with_fw; 4515 } 4516 sc->num_slices = cmd.data0; 4517 if (sc->num_slices > msix_cnt) 4518 sc->num_slices = msix_cnt; 4519 4520 if (mxge_max_slices == -1) { 4521 /* cap to number of CPUs in system */ 4522 if (sc->num_slices > mp_ncpus) 4523 sc->num_slices = mp_ncpus; 4524 } else { 4525 if (sc->num_slices > mxge_max_slices) 4526 sc->num_slices = mxge_max_slices; 4527 } 4528 /* make sure it is a power of two */ 4529 while (sc->num_slices & (sc->num_slices - 1)) 4530 sc->num_slices--; 4531 4532 if (mxge_verbose) 4533 device_printf(sc->dev, "using %d slices\n", 4534 sc->num_slices); 4535 4536 return; 4537 4538 abort_with_fw: 4539 sc->fw_name = old_fw; 4540 (void) mxge_load_firmware(sc, 0); 4541 } 4542 4543 static int 4544 mxge_add_msix_irqs(mxge_softc_t *sc) 4545 { 4546 size_t bytes; 4547 int count, err, i, rid; 4548 4549 rid = PCIR_BAR(2); 4550 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4551 &rid, RF_ACTIVE); 4552 4553 if (sc->msix_table_res == NULL) { 4554 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4555 return ENXIO; 4556 } 4557 4558 count = sc->num_slices; 4559 err = pci_alloc_msix(sc->dev, &count); 4560 if (err != 0) { 4561 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4562 "err = %d \n", sc->num_slices, err); 4563 goto abort_with_msix_table; 4564 } 4565 if (count < sc->num_slices) { 4566 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4567 count, sc->num_slices); 4568 device_printf(sc->dev, 4569 "Try setting hw.mxge.max_slices to %d\n", 4570 count); 4571 err = ENOSPC; 4572 goto abort_with_msix; 4573 } 4574 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4575 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4576 if (sc->msix_irq_res == NULL) { 4577 err = ENOMEM; 4578 goto abort_with_msix; 4579 } 4580 4581 for (i = 0; i < sc->num_slices; i++) { 4582 rid = i + 1; 4583 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4584 SYS_RES_IRQ, 4585 &rid, RF_ACTIVE); 4586 if (sc->msix_irq_res[i] == NULL) { 4587 device_printf(sc->dev, "couldn't allocate IRQ res" 4588 " for message %d\n", i); 4589 err = ENXIO; 4590 goto abort_with_res; 4591 } 4592 } 4593 4594 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4595 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4596 4597 for (i = 0; i < sc->num_slices; i++) { 4598 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4599 INTR_TYPE_NET | INTR_MPSAFE, 4600 #if __FreeBSD_version > 700030 4601 NULL, 4602 #endif 4603 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4604 if (err != 0) { 4605 device_printf(sc->dev, "couldn't setup intr for " 4606 "message %d\n", i); 4607 goto abort_with_intr; 4608 } 4609 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4610 sc->msix_ih[i], "s%d", i); 4611 } 4612 4613 if (mxge_verbose) { 4614 device_printf(sc->dev, "using %d msix IRQs:", 4615 sc->num_slices); 4616 for (i = 0; i < sc->num_slices; i++) 4617 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4618 printf("\n"); 4619 } 4620 return (0); 4621 4622 abort_with_intr: 4623 for (i = 0; i < sc->num_slices; i++) { 4624 if (sc->msix_ih[i] != NULL) { 4625 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4626 sc->msix_ih[i]); 4627 sc->msix_ih[i] = NULL; 4628 } 4629 } 4630 free(sc->msix_ih, M_DEVBUF); 4631 4632 4633 abort_with_res: 4634 for (i = 0; i < sc->num_slices; i++) { 4635 rid = i + 1; 4636 if (sc->msix_irq_res[i] != NULL) 4637 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4638 sc->msix_irq_res[i]); 4639 sc->msix_irq_res[i] = NULL; 4640 } 4641 free(sc->msix_irq_res, M_DEVBUF); 4642 4643 4644 abort_with_msix: 4645 pci_release_msi(sc->dev); 4646 4647 abort_with_msix_table: 4648 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4649 sc->msix_table_res); 4650 4651 return err; 4652 } 4653 4654 static int 4655 mxge_add_single_irq(mxge_softc_t *sc) 4656 { 4657 int count, err, rid; 4658 4659 count = pci_msi_count(sc->dev); 4660 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4661 rid = 1; 4662 } else { 4663 rid = 0; 4664 sc->legacy_irq = 1; 4665 } 4666 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4667 1, RF_SHAREABLE | RF_ACTIVE); 4668 if (sc->irq_res == NULL) { 4669 device_printf(sc->dev, "could not alloc interrupt\n"); 4670 return ENXIO; 4671 } 4672 if (mxge_verbose) 4673 device_printf(sc->dev, "using %s irq %ld\n", 4674 sc->legacy_irq ? "INTx" : "MSI", 4675 rman_get_start(sc->irq_res)); 4676 err = bus_setup_intr(sc->dev, sc->irq_res, 4677 INTR_TYPE_NET | INTR_MPSAFE, 4678 #if __FreeBSD_version > 700030 4679 NULL, 4680 #endif 4681 mxge_intr, &sc->ss[0], &sc->ih); 4682 if (err != 0) { 4683 bus_release_resource(sc->dev, SYS_RES_IRQ, 4684 sc->legacy_irq ? 0 : 1, sc->irq_res); 4685 if (!sc->legacy_irq) 4686 pci_release_msi(sc->dev); 4687 } 4688 return err; 4689 } 4690 4691 static void 4692 mxge_rem_msix_irqs(mxge_softc_t *sc) 4693 { 4694 int i, rid; 4695 4696 for (i = 0; i < sc->num_slices; i++) { 4697 if (sc->msix_ih[i] != NULL) { 4698 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4699 sc->msix_ih[i]); 4700 sc->msix_ih[i] = NULL; 4701 } 4702 } 4703 free(sc->msix_ih, M_DEVBUF); 4704 4705 for (i = 0; i < sc->num_slices; i++) { 4706 rid = i + 1; 4707 if (sc->msix_irq_res[i] != NULL) 4708 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4709 sc->msix_irq_res[i]); 4710 sc->msix_irq_res[i] = NULL; 4711 } 4712 free(sc->msix_irq_res, M_DEVBUF); 4713 4714 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4715 sc->msix_table_res); 4716 4717 pci_release_msi(sc->dev); 4718 return; 4719 } 4720 4721 static void 4722 mxge_rem_single_irq(mxge_softc_t *sc) 4723 { 4724 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4725 bus_release_resource(sc->dev, SYS_RES_IRQ, 4726 sc->legacy_irq ? 0 : 1, sc->irq_res); 4727 if (!sc->legacy_irq) 4728 pci_release_msi(sc->dev); 4729 } 4730 4731 static void 4732 mxge_rem_irq(mxge_softc_t *sc) 4733 { 4734 if (sc->num_slices > 1) 4735 mxge_rem_msix_irqs(sc); 4736 else 4737 mxge_rem_single_irq(sc); 4738 } 4739 4740 static int 4741 mxge_add_irq(mxge_softc_t *sc) 4742 { 4743 int err; 4744 4745 if (sc->num_slices > 1) 4746 err = mxge_add_msix_irqs(sc); 4747 else 4748 err = mxge_add_single_irq(sc); 4749 4750 if (0 && err == 0 && sc->num_slices > 1) { 4751 mxge_rem_msix_irqs(sc); 4752 err = mxge_add_msix_irqs(sc); 4753 } 4754 return err; 4755 } 4756 4757 4758 static int 4759 mxge_attach(device_t dev) 4760 { 4761 mxge_cmd_t cmd; 4762 mxge_softc_t *sc = device_get_softc(dev); 4763 struct ifnet *ifp; 4764 int err, rid; 4765 4766 sc->dev = dev; 4767 mxge_fetch_tunables(sc); 4768 4769 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4770 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4771 taskqueue_thread_enqueue, &sc->tq); 4772 if (sc->tq == NULL) { 4773 err = ENOMEM; 4774 goto abort_with_nothing; 4775 } 4776 4777 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4778 1, /* alignment */ 4779 0, /* boundary */ 4780 BUS_SPACE_MAXADDR, /* low */ 4781 BUS_SPACE_MAXADDR, /* high */ 4782 NULL, NULL, /* filter */ 4783 65536 + 256, /* maxsize */ 4784 MXGE_MAX_SEND_DESC, /* num segs */ 4785 65536, /* maxsegsize */ 4786 0, /* flags */ 4787 NULL, NULL, /* lock */ 4788 &sc->parent_dmat); /* tag */ 4789 4790 if (err != 0) { 4791 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4792 err); 4793 goto abort_with_tq; 4794 } 4795 4796 ifp = sc->ifp = if_alloc(IFT_ETHER); 4797 if (ifp == NULL) { 4798 device_printf(dev, "can not if_alloc()\n"); 4799 err = ENOSPC; 4800 goto abort_with_parent_dmat; 4801 } 4802 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4803 4804 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4805 device_get_nameunit(dev)); 4806 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4807 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4808 "%s:drv", device_get_nameunit(dev)); 4809 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4810 MTX_NETWORK_LOCK, MTX_DEF); 4811 4812 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4813 4814 mxge_setup_cfg_space(sc); 4815 4816 /* Map the board into the kernel */ 4817 rid = PCIR_BARS; 4818 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4819 ~0, 1, RF_ACTIVE); 4820 if (sc->mem_res == NULL) { 4821 device_printf(dev, "could not map memory\n"); 4822 err = ENXIO; 4823 goto abort_with_lock; 4824 } 4825 sc->sram = rman_get_virtual(sc->mem_res); 4826 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4827 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4828 device_printf(dev, "impossible memory region size %ld\n", 4829 rman_get_size(sc->mem_res)); 4830 err = ENXIO; 4831 goto abort_with_mem_res; 4832 } 4833 4834 /* make NULL terminated copy of the EEPROM strings section of 4835 lanai SRAM */ 4836 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4837 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4838 rman_get_bushandle(sc->mem_res), 4839 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4840 sc->eeprom_strings, 4841 MXGE_EEPROM_STRINGS_SIZE - 2); 4842 err = mxge_parse_strings(sc); 4843 if (err != 0) 4844 goto abort_with_mem_res; 4845 4846 /* Enable write combining for efficient use of PCIe bus */ 4847 mxge_enable_wc(sc); 4848 4849 /* Allocate the out of band dma memory */ 4850 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4851 sizeof (mxge_cmd_t), 64); 4852 if (err != 0) 4853 goto abort_with_mem_res; 4854 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4855 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4856 if (err != 0) 4857 goto abort_with_cmd_dma; 4858 4859 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4860 if (err != 0) 4861 goto abort_with_zeropad_dma; 4862 4863 /* select & load the firmware */ 4864 err = mxge_select_firmware(sc); 4865 if (err != 0) 4866 goto abort_with_dmabench; 4867 sc->intr_coal_delay = mxge_intr_coal_delay; 4868 4869 mxge_slice_probe(sc); 4870 err = mxge_alloc_slices(sc); 4871 if (err != 0) 4872 goto abort_with_dmabench; 4873 4874 err = mxge_reset(sc, 0); 4875 if (err != 0) 4876 goto abort_with_slices; 4877 4878 err = mxge_alloc_rings(sc); 4879 if (err != 0) { 4880 device_printf(sc->dev, "failed to allocate rings\n"); 4881 goto abort_with_slices; 4882 } 4883 4884 err = mxge_add_irq(sc); 4885 if (err != 0) { 4886 device_printf(sc->dev, "failed to add irq\n"); 4887 goto abort_with_rings; 4888 } 4889 4890 if_initbaudrate(ifp, IF_Gbps(10)); 4891 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4892 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4893 IFCAP_RXCSUM_IPV6; 4894 #if defined(INET) || defined(INET6) 4895 ifp->if_capabilities |= IFCAP_LRO; 4896 #endif 4897 4898 #ifdef MXGE_NEW_VLAN_API 4899 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4900 4901 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4902 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4903 sc->fw_ver_tiny >= 32) 4904 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4905 #endif 4906 sc->max_mtu = mxge_max_mtu(sc); 4907 if (sc->max_mtu >= 9000) 4908 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4909 else 4910 device_printf(dev, "MTU limited to %d. Install " 4911 "latest firmware for 9000 byte jumbo support\n", 4912 sc->max_mtu - ETHER_HDR_LEN); 4913 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4914 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4915 /* check to see if f/w supports TSO for IPv6 */ 4916 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4917 if (CSUM_TCP_IPV6) 4918 ifp->if_capabilities |= IFCAP_TSO6; 4919 sc->max_tso6_hlen = min(cmd.data0, 4920 sizeof (sc->ss[0].scratch)); 4921 } 4922 ifp->if_capenable = ifp->if_capabilities; 4923 if (sc->lro_cnt == 0) 4924 ifp->if_capenable &= ~IFCAP_LRO; 4925 ifp->if_init = mxge_init; 4926 ifp->if_softc = sc; 4927 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4928 ifp->if_ioctl = mxge_ioctl; 4929 ifp->if_start = mxge_start; 4930 /* Initialise the ifmedia structure */ 4931 ifmedia_init(&sc->media, 0, mxge_media_change, 4932 mxge_media_status); 4933 mxge_media_init(sc); 4934 mxge_media_probe(sc); 4935 sc->dying = 0; 4936 ether_ifattach(ifp, sc->mac_addr); 4937 /* ether_ifattach sets mtu to ETHERMTU */ 4938 if (mxge_initial_mtu != ETHERMTU) 4939 mxge_change_mtu(sc, mxge_initial_mtu); 4940 4941 mxge_add_sysctls(sc); 4942 #ifdef IFNET_BUF_RING 4943 ifp->if_transmit = mxge_transmit; 4944 ifp->if_qflush = mxge_qflush; 4945 #endif 4946 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4947 device_get_nameunit(sc->dev)); 4948 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4949 return 0; 4950 4951 abort_with_rings: 4952 mxge_free_rings(sc); 4953 abort_with_slices: 4954 mxge_free_slices(sc); 4955 abort_with_dmabench: 4956 mxge_dma_free(&sc->dmabench_dma); 4957 abort_with_zeropad_dma: 4958 mxge_dma_free(&sc->zeropad_dma); 4959 abort_with_cmd_dma: 4960 mxge_dma_free(&sc->cmd_dma); 4961 abort_with_mem_res: 4962 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4963 abort_with_lock: 4964 pci_disable_busmaster(dev); 4965 mtx_destroy(&sc->cmd_mtx); 4966 mtx_destroy(&sc->driver_mtx); 4967 if_free(ifp); 4968 abort_with_parent_dmat: 4969 bus_dma_tag_destroy(sc->parent_dmat); 4970 abort_with_tq: 4971 if (sc->tq != NULL) { 4972 taskqueue_drain(sc->tq, &sc->watchdog_task); 4973 taskqueue_free(sc->tq); 4974 sc->tq = NULL; 4975 } 4976 abort_with_nothing: 4977 return err; 4978 } 4979 4980 static int 4981 mxge_detach(device_t dev) 4982 { 4983 mxge_softc_t *sc = device_get_softc(dev); 4984 4985 if (mxge_vlans_active(sc)) { 4986 device_printf(sc->dev, 4987 "Detach vlans before removing module\n"); 4988 return EBUSY; 4989 } 4990 mtx_lock(&sc->driver_mtx); 4991 sc->dying = 1; 4992 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4993 mxge_close(sc, 0); 4994 mtx_unlock(&sc->driver_mtx); 4995 ether_ifdetach(sc->ifp); 4996 if (sc->tq != NULL) { 4997 taskqueue_drain(sc->tq, &sc->watchdog_task); 4998 taskqueue_free(sc->tq); 4999 sc->tq = NULL; 5000 } 5001 callout_drain(&sc->co_hdl); 5002 ifmedia_removeall(&sc->media); 5003 mxge_dummy_rdma(sc, 0); 5004 mxge_rem_sysctls(sc); 5005 mxge_rem_irq(sc); 5006 mxge_free_rings(sc); 5007 mxge_free_slices(sc); 5008 mxge_dma_free(&sc->dmabench_dma); 5009 mxge_dma_free(&sc->zeropad_dma); 5010 mxge_dma_free(&sc->cmd_dma); 5011 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5012 pci_disable_busmaster(dev); 5013 mtx_destroy(&sc->cmd_mtx); 5014 mtx_destroy(&sc->driver_mtx); 5015 if_free(sc->ifp); 5016 bus_dma_tag_destroy(sc->parent_dmat); 5017 return 0; 5018 } 5019 5020 static int 5021 mxge_shutdown(device_t dev) 5022 { 5023 return 0; 5024 } 5025 5026 /* 5027 This file uses Myri10GE driver indentation. 5028 5029 Local Variables: 5030 c-file-style:"linux" 5031 tab-width:8 5032 End: 5033 */ 5034