1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/linker.h> 35 #include <sys/firmware.h> 36 #include <sys/endian.h> 37 #include <sys/sockio.h> 38 #include <sys/mbuf.h> 39 #include <sys/malloc.h> 40 #include <sys/kdb.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/module.h> 44 #include <sys/socket.h> 45 #include <sys/sysctl.h> 46 #include <sys/sx.h> 47 #include <sys/taskqueue.h> 48 #include <contrib/zlib/zlib.h> 49 #include <dev/zlib/zcalloc.h> 50 51 #include <net/if.h> 52 #include <net/if_var.h> 53 #include <net/if_arp.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/if_vlan_var.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip6.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_lro.h> 69 #include <netinet6/ip6_var.h> 70 71 #include <machine/bus.h> 72 #include <machine/in_cksum.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 #include <sys/smp.h> 77 78 #include <dev/pci/pcireg.h> 79 #include <dev/pci/pcivar.h> 80 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 81 82 #include <vm/vm.h> /* for pmap_mapdev() */ 83 #include <vm/pmap.h> 84 85 #if defined(__i386) || defined(__amd64) 86 #include <machine/specialreg.h> 87 #endif 88 89 #include <dev/mxge/mxge_mcp.h> 90 #include <dev/mxge/mcp_gen_header.h> 91 /*#define MXGE_FAKE_IFP*/ 92 #include <dev/mxge/if_mxge_var.h> 93 #include <sys/buf_ring.h> 94 95 #include "opt_inet.h" 96 #include "opt_inet6.h" 97 98 /* tunable params */ 99 static int mxge_nvidia_ecrc_enable = 1; 100 static int mxge_force_firmware = 0; 101 static int mxge_intr_coal_delay = 30; 102 static int mxge_deassert_wait = 1; 103 static int mxge_flow_control = 1; 104 static int mxge_verbose = 0; 105 static int mxge_ticks; 106 static int mxge_max_slices = 1; 107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108 static int mxge_always_promisc = 0; 109 static int mxge_initial_mtu = ETHERMTU_JUMBO; 110 static int mxge_throttle = 0; 111 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112 static char *mxge_fw_aligned = "mxge_eth_z8e"; 113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116 static int mxge_probe(device_t dev); 117 static int mxge_attach(device_t dev); 118 static int mxge_detach(device_t dev); 119 static int mxge_shutdown(device_t dev); 120 static void mxge_intr(void *arg); 121 122 static device_method_t mxge_methods[] = 123 { 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 130 DEVMETHOD_END 131 }; 132 133 static driver_t mxge_driver = 134 { 135 "mxge", 136 mxge_methods, 137 sizeof(mxge_softc_t), 138 }; 139 140 /* Declare ourselves to be a child of the PCI bus.*/ 141 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0); 142 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 143 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 144 145 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 146 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 147 static int mxge_close(mxge_softc_t *sc, int down); 148 static int mxge_open(mxge_softc_t *sc); 149 static void mxge_tick(void *arg); 150 151 static int 152 mxge_probe(device_t dev) 153 { 154 int rev; 155 156 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 157 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 158 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 159 rev = pci_get_revid(dev); 160 switch (rev) { 161 case MXGE_PCI_REV_Z8E: 162 device_set_desc(dev, "Myri10G-PCIE-8A"); 163 break; 164 case MXGE_PCI_REV_Z8ES: 165 device_set_desc(dev, "Myri10G-PCIE-8B"); 166 break; 167 default: 168 device_set_desc(dev, "Myri10G-PCIE-8??"); 169 device_printf(dev, "Unrecognized rev %d NIC\n", 170 rev); 171 break; 172 } 173 return 0; 174 } 175 return ENXIO; 176 } 177 178 static void 179 mxge_enable_wc(mxge_softc_t *sc) 180 { 181 #if defined(__i386) || defined(__amd64) 182 vm_offset_t len; 183 int err; 184 185 sc->wc = 1; 186 len = rman_get_size(sc->mem_res); 187 err = pmap_change_attr((vm_offset_t) sc->sram, 188 len, PAT_WRITE_COMBINING); 189 if (err != 0) { 190 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 191 err); 192 sc->wc = 0; 193 } 194 #endif 195 } 196 197 /* callback to get our DMA address */ 198 static void 199 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 200 int error) 201 { 202 if (error == 0) { 203 *(bus_addr_t *) arg = segs->ds_addr; 204 } 205 } 206 207 static int 208 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 209 bus_size_t alignment) 210 { 211 int err; 212 device_t dev = sc->dev; 213 bus_size_t boundary, maxsegsize; 214 215 if (bytes > 4096 && alignment == 4096) { 216 boundary = 0; 217 maxsegsize = bytes; 218 } else { 219 boundary = 4096; 220 maxsegsize = 4096; 221 } 222 223 /* allocate DMAable memory tags */ 224 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 225 alignment, /* alignment */ 226 boundary, /* boundary */ 227 BUS_SPACE_MAXADDR, /* low */ 228 BUS_SPACE_MAXADDR, /* high */ 229 NULL, NULL, /* filter */ 230 bytes, /* maxsize */ 231 1, /* num segs */ 232 maxsegsize, /* maxsegsize */ 233 BUS_DMA_COHERENT, /* flags */ 234 NULL, NULL, /* lock */ 235 &dma->dmat); /* tag */ 236 if (err != 0) { 237 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 238 return err; 239 } 240 241 /* allocate DMAable memory & map */ 242 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 243 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 244 | BUS_DMA_ZERO), &dma->map); 245 if (err != 0) { 246 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 247 goto abort_with_dmat; 248 } 249 250 /* load the memory */ 251 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 252 mxge_dmamap_callback, 253 (void *)&dma->bus_addr, 0); 254 if (err != 0) { 255 device_printf(dev, "couldn't load map (err = %d)\n", err); 256 goto abort_with_mem; 257 } 258 return 0; 259 260 abort_with_mem: 261 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 262 abort_with_dmat: 263 (void)bus_dma_tag_destroy(dma->dmat); 264 return err; 265 } 266 267 static void 268 mxge_dma_free(mxge_dma_t *dma) 269 { 270 bus_dmamap_unload(dma->dmat, dma->map); 271 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 272 (void)bus_dma_tag_destroy(dma->dmat); 273 } 274 275 /* 276 * The eeprom strings on the lanaiX have the format 277 * SN=x\0 278 * MAC=x:x:x:x:x:x\0 279 * PC=text\0 280 */ 281 282 static int 283 mxge_parse_strings(mxge_softc_t *sc) 284 { 285 char *ptr; 286 int i, found_mac, found_sn2; 287 char *endptr; 288 289 ptr = sc->eeprom_strings; 290 found_mac = 0; 291 found_sn2 = 0; 292 while (*ptr != '\0') { 293 if (strncmp(ptr, "MAC=", 4) == 0) { 294 ptr += 4; 295 for (i = 0;;) { 296 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 297 if (endptr - ptr != 2) 298 goto abort; 299 ptr = endptr; 300 if (++i == 6) 301 break; 302 if (*ptr++ != ':') 303 goto abort; 304 } 305 found_mac = 1; 306 } else if (strncmp(ptr, "PC=", 3) == 0) { 307 ptr += 3; 308 strlcpy(sc->product_code_string, ptr, 309 sizeof(sc->product_code_string)); 310 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 311 ptr += 3; 312 strlcpy(sc->serial_number_string, ptr, 313 sizeof(sc->serial_number_string)); 314 } else if (strncmp(ptr, "SN2=", 4) == 0) { 315 /* SN2 takes precedence over SN */ 316 ptr += 4; 317 found_sn2 = 1; 318 strlcpy(sc->serial_number_string, ptr, 319 sizeof(sc->serial_number_string)); 320 } 321 while (*ptr++ != '\0') {} 322 } 323 324 if (found_mac) 325 return 0; 326 327 abort: 328 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 329 330 return ENXIO; 331 } 332 333 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 334 static void 335 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 336 { 337 uint32_t val; 338 unsigned long base, off; 339 char *va, *cfgptr; 340 device_t pdev, mcp55; 341 uint16_t vendor_id, device_id, word; 342 uintptr_t bus, slot, func, ivend, idev; 343 uint32_t *ptr32; 344 345 if (!mxge_nvidia_ecrc_enable) 346 return; 347 348 pdev = device_get_parent(device_get_parent(sc->dev)); 349 if (pdev == NULL) { 350 device_printf(sc->dev, "could not find parent?\n"); 351 return; 352 } 353 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 354 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 355 356 if (vendor_id != 0x10de) 357 return; 358 359 base = 0; 360 361 if (device_id == 0x005d) { 362 /* ck804, base address is magic */ 363 base = 0xe0000000UL; 364 } else if (device_id >= 0x0374 && device_id <= 0x378) { 365 /* mcp55, base address stored in chipset */ 366 mcp55 = pci_find_bsf(0, 0, 0); 367 if (mcp55 && 368 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 369 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 370 word = pci_read_config(mcp55, 0x90, 2); 371 base = ((unsigned long)word & 0x7ffeU) << 25; 372 } 373 } 374 if (!base) 375 return; 376 377 /* XXXX 378 Test below is commented because it is believed that doing 379 config read/write beyond 0xff will access the config space 380 for the next larger function. Uncomment this and remove 381 the hacky pmap_mapdev() way of accessing config space when 382 FreeBSD grows support for extended pcie config space access 383 */ 384 #if 0 385 /* See if we can, by some miracle, access the extended 386 config space */ 387 val = pci_read_config(pdev, 0x178, 4); 388 if (val != 0xffffffff) { 389 val |= 0x40; 390 pci_write_config(pdev, 0x178, val, 4); 391 return; 392 } 393 #endif 394 /* Rather than using normal pci config space writes, we must 395 * map the Nvidia config space ourselves. This is because on 396 * opteron/nvidia class machine the 0xe000000 mapping is 397 * handled by the nvidia chipset, that means the internal PCI 398 * device (the on-chip northbridge), or the amd-8131 bridge 399 * and things behind them are not visible by this method. 400 */ 401 402 BUS_READ_IVAR(device_get_parent(pdev), pdev, 403 PCI_IVAR_BUS, &bus); 404 BUS_READ_IVAR(device_get_parent(pdev), pdev, 405 PCI_IVAR_SLOT, &slot); 406 BUS_READ_IVAR(device_get_parent(pdev), pdev, 407 PCI_IVAR_FUNCTION, &func); 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_VENDOR, &ivend); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_DEVICE, &idev); 412 413 off = base 414 + 0x00100000UL * (unsigned long)bus 415 + 0x00001000UL * (unsigned long)(func 416 + 8 * slot); 417 418 /* map it into the kernel */ 419 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 420 421 if (va == NULL) { 422 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 423 return; 424 } 425 /* get a pointer to the config space mapped into the kernel */ 426 cfgptr = va + (off & PAGE_MASK); 427 428 /* make sure that we can really access it */ 429 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 430 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 431 if (! (vendor_id == ivend && device_id == idev)) { 432 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 433 vendor_id, device_id); 434 pmap_unmapdev(va, PAGE_SIZE); 435 return; 436 } 437 438 ptr32 = (uint32_t*)(cfgptr + 0x178); 439 val = *ptr32; 440 441 if (val == 0xffffffff) { 442 device_printf(sc->dev, "extended mapping failed\n"); 443 pmap_unmapdev(va, PAGE_SIZE); 444 return; 445 } 446 *ptr32 = val | 0x40; 447 pmap_unmapdev(va, PAGE_SIZE); 448 if (mxge_verbose) 449 device_printf(sc->dev, 450 "Enabled ECRC on upstream Nvidia bridge " 451 "at %d:%d:%d\n", 452 (int)bus, (int)slot, (int)func); 453 return; 454 } 455 #else 456 static void 457 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 458 { 459 device_printf(sc->dev, 460 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 461 return; 462 } 463 #endif 464 465 static int 466 mxge_dma_test(mxge_softc_t *sc, int test_type) 467 { 468 mxge_cmd_t cmd; 469 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 470 int status; 471 uint32_t len; 472 char *test = " "; 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517 abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523 } 524 525 /* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544 static int 545 mxge_firmware_probe(mxge_softc_t *sc) 546 { 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. Not required on Z8ES or newer. 583 */ 584 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 585 return 0; 586 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 587 if (status == 0) 588 return 0; /* keep the aligned firmware */ 589 590 if (status != E2BIG) 591 device_printf(dev, "DMA test failed: %d\n", status); 592 if (status == ENOSYS) 593 device_printf(dev, "Falling back to ethp! " 594 "Please install up to date fw\n"); 595 return status; 596 } 597 598 static int 599 mxge_select_firmware(mxge_softc_t *sc) 600 { 601 int aligned = 0; 602 int force_firmware = mxge_force_firmware; 603 604 if (sc->throttle) 605 force_firmware = sc->throttle; 606 607 if (force_firmware != 0) { 608 if (force_firmware == 1) 609 aligned = 1; 610 else 611 aligned = 0; 612 if (mxge_verbose) 613 device_printf(sc->dev, 614 "Assuming %s completions (forced)\n", 615 aligned ? "aligned" : "unaligned"); 616 goto abort; 617 } 618 619 /* if the PCIe link width is 4 or less, we can use the aligned 620 firmware and skip any checks */ 621 if (sc->link_width != 0 && sc->link_width <= 4) { 622 device_printf(sc->dev, 623 "PCIe x%d Link, expect reduced performance\n", 624 sc->link_width); 625 aligned = 1; 626 goto abort; 627 } 628 629 if (0 == mxge_firmware_probe(sc)) 630 return 0; 631 632 abort: 633 if (aligned) { 634 sc->fw_name = mxge_fw_aligned; 635 sc->tx_boundary = 4096; 636 } else { 637 sc->fw_name = mxge_fw_unaligned; 638 sc->tx_boundary = 2048; 639 } 640 return (mxge_load_firmware(sc, 0)); 641 } 642 643 static int 644 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 645 { 646 647 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 648 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 649 be32toh(hdr->mcp_type)); 650 return EIO; 651 } 652 653 /* save firmware version for sysctl */ 654 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 655 if (mxge_verbose) 656 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 657 658 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 659 &sc->fw_ver_minor, &sc->fw_ver_tiny); 660 661 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 662 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 663 device_printf(sc->dev, "Found firmware version %s\n", 664 sc->fw_version); 665 device_printf(sc->dev, "Driver needs %d.%d\n", 666 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 667 return EINVAL; 668 } 669 return 0; 670 671 } 672 673 static int 674 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 675 { 676 z_stream zs; 677 char *inflate_buffer; 678 const struct firmware *fw; 679 const mcp_gen_header_t *hdr; 680 unsigned hdr_offset; 681 int status; 682 unsigned int i; 683 size_t fw_len; 684 685 fw = firmware_get(sc->fw_name); 686 if (fw == NULL) { 687 device_printf(sc->dev, "Could not find firmware image %s\n", 688 sc->fw_name); 689 return ENOENT; 690 } 691 692 /* setup zlib and decompress f/w */ 693 bzero(&zs, sizeof (zs)); 694 zs.zalloc = zcalloc_nowait; 695 zs.zfree = zcfree; 696 status = inflateInit(&zs); 697 if (status != Z_OK) { 698 status = EIO; 699 goto abort_with_fw; 700 } 701 702 /* the uncompressed size is stored as the firmware version, 703 which would otherwise go unused */ 704 fw_len = (size_t) fw->version; 705 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 706 if (inflate_buffer == NULL) 707 goto abort_with_zs; 708 zs.avail_in = fw->datasize; 709 zs.next_in = __DECONST(char *, fw->data); 710 zs.avail_out = fw_len; 711 zs.next_out = inflate_buffer; 712 status = inflate(&zs, Z_FINISH); 713 if (status != Z_STREAM_END) { 714 device_printf(sc->dev, "zlib %d\n", status); 715 status = EIO; 716 goto abort_with_buffer; 717 } 718 719 /* check id */ 720 hdr_offset = htobe32(*(const uint32_t *) 721 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 722 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 723 device_printf(sc->dev, "Bad firmware file"); 724 status = EIO; 725 goto abort_with_buffer; 726 } 727 hdr = (const void*)(inflate_buffer + hdr_offset); 728 729 status = mxge_validate_firmware(sc, hdr); 730 if (status != 0) 731 goto abort_with_buffer; 732 733 /* Copy the inflated firmware to NIC SRAM. */ 734 for (i = 0; i < fw_len; i += 256) { 735 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 736 inflate_buffer + i, 737 min(256U, (unsigned)(fw_len - i))); 738 wmb(); 739 (void)*sc->sram; 740 wmb(); 741 } 742 743 *limit = fw_len; 744 status = 0; 745 abort_with_buffer: 746 free(inflate_buffer, M_TEMP); 747 abort_with_zs: 748 inflateEnd(&zs); 749 abort_with_fw: 750 firmware_put(fw, FIRMWARE_UNLOAD); 751 return status; 752 } 753 754 /* 755 * Enable or disable periodic RDMAs from the host to make certain 756 * chipsets resend dropped PCIe messages 757 */ 758 759 static void 760 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 761 { 762 char buf_bytes[72]; 763 volatile uint32_t *confirm; 764 volatile char *submit; 765 uint32_t *buf, dma_low, dma_high; 766 int i; 767 768 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 769 770 /* clear confirmation addr */ 771 confirm = (volatile uint32_t *)sc->cmd; 772 *confirm = 0; 773 wmb(); 774 775 /* send an rdma command to the PCIe engine, and wait for the 776 response in the confirmation address. The firmware should 777 write a -1 there to indicate it is alive and well 778 */ 779 780 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 781 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 782 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 783 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 784 buf[2] = htobe32(0xffffffff); /* confirm data */ 785 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 786 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 787 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 788 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 789 buf[5] = htobe32(enable); /* enable? */ 790 791 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 792 793 mxge_pio_copy(submit, buf, 64); 794 wmb(); 795 DELAY(1000); 796 wmb(); 797 i = 0; 798 while (*confirm != 0xffffffff && i < 20) { 799 DELAY(1000); 800 i++; 801 } 802 if (*confirm != 0xffffffff) { 803 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 804 (enable ? "enable" : "disable"), confirm, 805 *confirm); 806 } 807 return; 808 } 809 810 static int 811 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 812 { 813 mcp_cmd_t *buf; 814 char buf_bytes[sizeof(*buf) + 8]; 815 volatile mcp_cmd_response_t *response = sc->cmd; 816 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 817 uint32_t dma_low, dma_high; 818 int err, sleep_total = 0; 819 820 /* ensure buf is aligned to 8 bytes */ 821 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 822 823 buf->data0 = htobe32(data->data0); 824 buf->data1 = htobe32(data->data1); 825 buf->data2 = htobe32(data->data2); 826 buf->cmd = htobe32(cmd); 827 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 828 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 829 830 buf->response_addr.low = htobe32(dma_low); 831 buf->response_addr.high = htobe32(dma_high); 832 mtx_lock(&sc->cmd_mtx); 833 response->result = 0xffffffff; 834 wmb(); 835 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 836 837 /* wait up to 20ms */ 838 err = EAGAIN; 839 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 840 bus_dmamap_sync(sc->cmd_dma.dmat, 841 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 842 wmb(); 843 switch (be32toh(response->result)) { 844 case 0: 845 data->data0 = be32toh(response->data); 846 err = 0; 847 break; 848 case 0xffffffff: 849 DELAY(1000); 850 break; 851 case MXGEFW_CMD_UNKNOWN: 852 err = ENOSYS; 853 break; 854 case MXGEFW_CMD_ERROR_UNALIGNED: 855 err = E2BIG; 856 break; 857 case MXGEFW_CMD_ERROR_BUSY: 858 err = EBUSY; 859 break; 860 case MXGEFW_CMD_ERROR_I2C_ABSENT: 861 err = ENXIO; 862 break; 863 default: 864 device_printf(sc->dev, 865 "mxge: command %d " 866 "failed, result = %d\n", 867 cmd, be32toh(response->result)); 868 err = ENXIO; 869 break; 870 } 871 if (err != EAGAIN) 872 break; 873 } 874 if (err == EAGAIN) 875 device_printf(sc->dev, "mxge: command %d timed out" 876 "result = %d\n", 877 cmd, be32toh(response->result)); 878 mtx_unlock(&sc->cmd_mtx); 879 return err; 880 } 881 882 static int 883 mxge_adopt_running_firmware(mxge_softc_t *sc) 884 { 885 struct mcp_gen_header *hdr; 886 const size_t bytes = sizeof (struct mcp_gen_header); 887 size_t hdr_offset; 888 int status; 889 890 /* find running firmware header */ 891 hdr_offset = htobe32(*(volatile uint32_t *) 892 (sc->sram + MCP_HEADER_PTR_OFFSET)); 893 894 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 895 device_printf(sc->dev, 896 "Running firmware has bad header offset (%d)\n", 897 (int)hdr_offset); 898 return EIO; 899 } 900 901 /* copy header of running firmware from SRAM to host memory to 902 * validate firmware */ 903 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 904 if (hdr == NULL) { 905 device_printf(sc->dev, "could not malloc firmware hdr\n"); 906 return ENOMEM; 907 } 908 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 909 rman_get_bushandle(sc->mem_res), 910 hdr_offset, (char *)hdr, bytes); 911 status = mxge_validate_firmware(sc, hdr); 912 free(hdr, M_DEVBUF); 913 914 /* 915 * check to see if adopted firmware has bug where adopting 916 * it will cause broadcasts to be filtered unless the NIC 917 * is kept in ALLMULTI mode 918 */ 919 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 920 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 921 sc->adopted_rx_filter_bug = 1; 922 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 923 "working around rx filter bug\n", 924 sc->fw_ver_major, sc->fw_ver_minor, 925 sc->fw_ver_tiny); 926 } 927 928 return status; 929 } 930 931 static int 932 mxge_load_firmware(mxge_softc_t *sc, int adopt) 933 { 934 volatile uint32_t *confirm; 935 volatile char *submit; 936 char buf_bytes[72]; 937 uint32_t *buf, size, dma_low, dma_high; 938 int status, i; 939 940 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 941 942 size = sc->sram_size; 943 status = mxge_load_firmware_helper(sc, &size); 944 if (status) { 945 if (!adopt) 946 return status; 947 /* Try to use the currently running firmware, if 948 it is new enough */ 949 status = mxge_adopt_running_firmware(sc); 950 if (status) { 951 device_printf(sc->dev, 952 "failed to adopt running firmware\n"); 953 return status; 954 } 955 device_printf(sc->dev, 956 "Successfully adopted running firmware\n"); 957 if (sc->tx_boundary == 4096) { 958 device_printf(sc->dev, 959 "Using firmware currently running on NIC" 960 ". For optimal\n"); 961 device_printf(sc->dev, 962 "performance consider loading optimized " 963 "firmware\n"); 964 } 965 sc->fw_name = mxge_fw_unaligned; 966 sc->tx_boundary = 2048; 967 return 0; 968 } 969 /* clear confirmation addr */ 970 confirm = (volatile uint32_t *)sc->cmd; 971 *confirm = 0; 972 wmb(); 973 /* send a reload command to the bootstrap MCP, and wait for the 974 response in the confirmation address. The firmware should 975 write a -1 there to indicate it is alive and well 976 */ 977 978 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 979 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 980 981 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 982 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 983 buf[2] = htobe32(0xffffffff); /* confirm data */ 984 985 /* FIX: All newest firmware should un-protect the bottom of 986 the sram before handoff. However, the very first interfaces 987 do not. Therefore the handoff copy must skip the first 8 bytes 988 */ 989 /* where the code starts*/ 990 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 991 buf[4] = htobe32(size - 8); /* length of code */ 992 buf[5] = htobe32(8); /* where to copy to */ 993 buf[6] = htobe32(0); /* where to jump to */ 994 995 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 996 mxge_pio_copy(submit, buf, 64); 997 wmb(); 998 DELAY(1000); 999 wmb(); 1000 i = 0; 1001 while (*confirm != 0xffffffff && i < 20) { 1002 DELAY(1000*10); 1003 i++; 1004 bus_dmamap_sync(sc->cmd_dma.dmat, 1005 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1006 } 1007 if (*confirm != 0xffffffff) { 1008 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1009 confirm, *confirm); 1010 1011 return ENXIO; 1012 } 1013 return 0; 1014 } 1015 1016 static int 1017 mxge_update_mac_address(mxge_softc_t *sc) 1018 { 1019 mxge_cmd_t cmd; 1020 uint8_t *addr = sc->mac_addr; 1021 int status; 1022 1023 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1024 | (addr[2] << 8) | addr[3]); 1025 1026 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1027 1028 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1029 return status; 1030 } 1031 1032 static int 1033 mxge_change_pause(mxge_softc_t *sc, int pause) 1034 { 1035 mxge_cmd_t cmd; 1036 int status; 1037 1038 if (pause) 1039 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1040 &cmd); 1041 else 1042 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1043 &cmd); 1044 1045 if (status) { 1046 device_printf(sc->dev, "Failed to set flow control mode\n"); 1047 return ENXIO; 1048 } 1049 sc->pause = pause; 1050 return 0; 1051 } 1052 1053 static void 1054 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1055 { 1056 mxge_cmd_t cmd; 1057 int status; 1058 1059 if (mxge_always_promisc) 1060 promisc = 1; 1061 1062 if (promisc) 1063 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1064 &cmd); 1065 else 1066 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1067 &cmd); 1068 1069 if (status) { 1070 device_printf(sc->dev, "Failed to set promisc mode\n"); 1071 } 1072 } 1073 1074 struct mxge_add_maddr_ctx { 1075 mxge_softc_t *sc; 1076 int error; 1077 }; 1078 1079 static u_int 1080 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1081 { 1082 struct mxge_add_maddr_ctx *ctx = arg; 1083 mxge_cmd_t cmd; 1084 1085 if (ctx->error != 0) 1086 return (0); 1087 bcopy(LLADDR(sdl), &cmd.data0, 4); 1088 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1089 cmd.data0 = htonl(cmd.data0); 1090 cmd.data1 = htonl(cmd.data1); 1091 1092 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1093 1094 return (1); 1095 } 1096 1097 static void 1098 mxge_set_multicast_list(mxge_softc_t *sc) 1099 { 1100 struct mxge_add_maddr_ctx ctx; 1101 if_t ifp = sc->ifp; 1102 mxge_cmd_t cmd; 1103 int err; 1104 1105 /* This firmware is known to not support multicast */ 1106 if (!sc->fw_multicast_support) 1107 return; 1108 1109 /* Disable multicast filtering while we play with the lists*/ 1110 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1111 if (err != 0) { 1112 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1113 " error status: %d\n", err); 1114 return; 1115 } 1116 1117 if (sc->adopted_rx_filter_bug) 1118 return; 1119 1120 if (if_getflags(ifp) & IFF_ALLMULTI) 1121 /* request to disable multicast filtering, so quit here */ 1122 return; 1123 1124 /* Flush all the filters */ 1125 1126 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1127 if (err != 0) { 1128 device_printf(sc->dev, 1129 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1130 ", error status: %d\n", err); 1131 return; 1132 } 1133 1134 /* Walk the multicast list, and add each address */ 1135 ctx.sc = sc; 1136 ctx.error = 0; 1137 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1138 if (ctx.error != 0) { 1139 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1140 "error status:" "%d\t", ctx.error); 1141 /* abort, leaving multicast filtering off */ 1142 return; 1143 } 1144 1145 /* Enable multicast filtering */ 1146 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1147 if (err != 0) { 1148 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1149 ", error status: %d\n", err); 1150 } 1151 } 1152 1153 static int 1154 mxge_max_mtu(mxge_softc_t *sc) 1155 { 1156 mxge_cmd_t cmd; 1157 int status; 1158 1159 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1160 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1161 1162 /* try to set nbufs to see if it we can 1163 use virtually contiguous jumbos */ 1164 cmd.data0 = 0; 1165 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1166 &cmd); 1167 if (status == 0) 1168 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1169 1170 /* otherwise, we're limited to MJUMPAGESIZE */ 1171 return MJUMPAGESIZE - MXGEFW_PAD; 1172 } 1173 1174 static int 1175 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1176 { 1177 struct mxge_slice_state *ss; 1178 mxge_rx_done_t *rx_done; 1179 volatile uint32_t *irq_claim; 1180 mxge_cmd_t cmd; 1181 int slice, status; 1182 1183 /* try to send a reset command to the card to see if it 1184 is alive */ 1185 memset(&cmd, 0, sizeof (cmd)); 1186 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1187 if (status != 0) { 1188 device_printf(sc->dev, "failed reset\n"); 1189 return ENXIO; 1190 } 1191 1192 mxge_dummy_rdma(sc, 1); 1193 1194 /* set the intrq size */ 1195 cmd.data0 = sc->rx_ring_size; 1196 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1197 1198 /* 1199 * Even though we already know how many slices are supported 1200 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1201 * has magic side effects, and must be called after a reset. 1202 * It must be called prior to calling any RSS related cmds, 1203 * including assigning an interrupt queue for anything but 1204 * slice 0. It must also be called *after* 1205 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1206 * the firmware to compute offsets. 1207 */ 1208 1209 if (sc->num_slices > 1) { 1210 /* ask the maximum number of slices it supports */ 1211 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1212 &cmd); 1213 if (status != 0) { 1214 device_printf(sc->dev, 1215 "failed to get number of slices\n"); 1216 return status; 1217 } 1218 /* 1219 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1220 * to setting up the interrupt queue DMA 1221 */ 1222 cmd.data0 = sc->num_slices; 1223 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1224 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1225 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1226 &cmd); 1227 if (status != 0) { 1228 device_printf(sc->dev, 1229 "failed to set number of slices\n"); 1230 return status; 1231 } 1232 } 1233 1234 if (interrupts_setup) { 1235 /* Now exchange information about interrupts */ 1236 for (slice = 0; slice < sc->num_slices; slice++) { 1237 rx_done = &sc->ss[slice].rx_done; 1238 memset(rx_done->entry, 0, sc->rx_ring_size); 1239 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1240 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1241 cmd.data2 = slice; 1242 status |= mxge_send_cmd(sc, 1243 MXGEFW_CMD_SET_INTRQ_DMA, 1244 &cmd); 1245 } 1246 } 1247 1248 status |= mxge_send_cmd(sc, 1249 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1250 1251 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1252 1253 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1254 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1255 1256 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1257 &cmd); 1258 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1259 if (status != 0) { 1260 device_printf(sc->dev, "failed set interrupt parameters\n"); 1261 return status; 1262 } 1263 1264 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1265 1266 /* run a DMA benchmark */ 1267 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1268 1269 for (slice = 0; slice < sc->num_slices; slice++) { 1270 ss = &sc->ss[slice]; 1271 1272 ss->irq_claim = irq_claim + (2 * slice); 1273 /* reset mcp/driver shared state back to 0 */ 1274 ss->rx_done.idx = 0; 1275 ss->rx_done.cnt = 0; 1276 ss->tx.req = 0; 1277 ss->tx.done = 0; 1278 ss->tx.pkt_done = 0; 1279 ss->tx.queue_active = 0; 1280 ss->tx.activate = 0; 1281 ss->tx.deactivate = 0; 1282 ss->tx.wake = 0; 1283 ss->tx.defrag = 0; 1284 ss->tx.stall = 0; 1285 ss->rx_big.cnt = 0; 1286 ss->rx_small.cnt = 0; 1287 ss->lc.lro_bad_csum = 0; 1288 ss->lc.lro_queued = 0; 1289 ss->lc.lro_flushed = 0; 1290 if (ss->fw_stats != NULL) { 1291 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1292 } 1293 } 1294 sc->rdma_tags_available = 15; 1295 status = mxge_update_mac_address(sc); 1296 mxge_change_promisc(sc, if_getflags(sc->ifp) & IFF_PROMISC); 1297 mxge_change_pause(sc, sc->pause); 1298 mxge_set_multicast_list(sc); 1299 if (sc->throttle) { 1300 cmd.data0 = sc->throttle; 1301 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1302 &cmd)) { 1303 device_printf(sc->dev, 1304 "can't enable throttle\n"); 1305 } 1306 } 1307 return status; 1308 } 1309 1310 static int 1311 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1312 { 1313 mxge_cmd_t cmd; 1314 mxge_softc_t *sc; 1315 int err; 1316 unsigned int throttle; 1317 1318 sc = arg1; 1319 throttle = sc->throttle; 1320 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1321 if (err != 0) { 1322 return err; 1323 } 1324 1325 if (throttle == sc->throttle) 1326 return 0; 1327 1328 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1329 return EINVAL; 1330 1331 mtx_lock(&sc->driver_mtx); 1332 cmd.data0 = throttle; 1333 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1334 if (err == 0) 1335 sc->throttle = throttle; 1336 mtx_unlock(&sc->driver_mtx); 1337 return err; 1338 } 1339 1340 static int 1341 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1342 { 1343 mxge_softc_t *sc; 1344 unsigned int intr_coal_delay; 1345 int err; 1346 1347 sc = arg1; 1348 intr_coal_delay = sc->intr_coal_delay; 1349 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1350 if (err != 0) { 1351 return err; 1352 } 1353 if (intr_coal_delay == sc->intr_coal_delay) 1354 return 0; 1355 1356 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1357 return EINVAL; 1358 1359 mtx_lock(&sc->driver_mtx); 1360 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1361 sc->intr_coal_delay = intr_coal_delay; 1362 1363 mtx_unlock(&sc->driver_mtx); 1364 return err; 1365 } 1366 1367 static int 1368 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1369 { 1370 mxge_softc_t *sc; 1371 unsigned int enabled; 1372 int err; 1373 1374 sc = arg1; 1375 enabled = sc->pause; 1376 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1377 if (err != 0) { 1378 return err; 1379 } 1380 if (enabled == sc->pause) 1381 return 0; 1382 1383 mtx_lock(&sc->driver_mtx); 1384 err = mxge_change_pause(sc, enabled); 1385 mtx_unlock(&sc->driver_mtx); 1386 return err; 1387 } 1388 1389 static int 1390 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1391 { 1392 int err; 1393 1394 if (arg1 == NULL) 1395 return EFAULT; 1396 arg2 = be32toh(*(int *)arg1); 1397 arg1 = NULL; 1398 err = sysctl_handle_int(oidp, arg1, arg2, req); 1399 1400 return err; 1401 } 1402 1403 static void 1404 mxge_rem_sysctls(mxge_softc_t *sc) 1405 { 1406 struct mxge_slice_state *ss; 1407 int slice; 1408 1409 if (sc->slice_sysctl_tree == NULL) 1410 return; 1411 1412 for (slice = 0; slice < sc->num_slices; slice++) { 1413 ss = &sc->ss[slice]; 1414 if (ss == NULL || ss->sysctl_tree == NULL) 1415 continue; 1416 sysctl_ctx_free(&ss->sysctl_ctx); 1417 ss->sysctl_tree = NULL; 1418 } 1419 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1420 sc->slice_sysctl_tree = NULL; 1421 } 1422 1423 static void 1424 mxge_add_sysctls(mxge_softc_t *sc) 1425 { 1426 struct sysctl_ctx_list *ctx; 1427 struct sysctl_oid_list *children; 1428 mcp_irq_data_t *fw; 1429 struct mxge_slice_state *ss; 1430 int slice; 1431 char slice_num[8]; 1432 1433 ctx = device_get_sysctl_ctx(sc->dev); 1434 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1435 fw = sc->ss[0].fw_stats; 1436 1437 /* random information */ 1438 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1439 "firmware_version", 1440 CTLFLAG_RD, sc->fw_version, 1441 0, "firmware version"); 1442 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1443 "serial_number", 1444 CTLFLAG_RD, sc->serial_number_string, 1445 0, "serial number"); 1446 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1447 "product_code", 1448 CTLFLAG_RD, sc->product_code_string, 1449 0, "product_code"); 1450 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1451 "pcie_link_width", 1452 CTLFLAG_RD, &sc->link_width, 1453 0, "tx_boundary"); 1454 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1455 "tx_boundary", 1456 CTLFLAG_RD, &sc->tx_boundary, 1457 0, "tx_boundary"); 1458 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1459 "write_combine", 1460 CTLFLAG_RD, &sc->wc, 1461 0, "write combining PIO?"); 1462 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1463 "read_dma_MBs", 1464 CTLFLAG_RD, &sc->read_dma, 1465 0, "DMA Read speed in MB/s"); 1466 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1467 "write_dma_MBs", 1468 CTLFLAG_RD, &sc->write_dma, 1469 0, "DMA Write speed in MB/s"); 1470 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1471 "read_write_dma_MBs", 1472 CTLFLAG_RD, &sc->read_write_dma, 1473 0, "DMA concurrent Read/Write speed in MB/s"); 1474 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1475 "watchdog_resets", 1476 CTLFLAG_RD, &sc->watchdog_resets, 1477 0, "Number of times NIC was reset"); 1478 1479 /* performance related tunables */ 1480 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1481 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1482 sc, 0, mxge_change_intr_coal, "I", 1483 "interrupt coalescing delay in usecs"); 1484 1485 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1486 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1487 mxge_change_throttle, "I", "transmit throttling"); 1488 1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1490 "flow_control_enabled", 1491 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1492 mxge_change_flow_control, "I", 1493 "interrupt coalescing delay in usecs"); 1494 1495 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1496 "deassert_wait", 1497 CTLFLAG_RW, &mxge_deassert_wait, 1498 0, "Wait for IRQ line to go low in ihandler"); 1499 1500 /* stats block from firmware is in network byte order. 1501 Need to swap it */ 1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1503 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1504 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1505 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1506 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1507 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1508 "rdma_tags_available"); 1509 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1510 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1511 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1512 "dropped_bad_crc32"); 1513 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1514 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1515 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1516 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1517 "dropped_link_error_or_filtered", 1518 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1519 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1520 "dropped_link_error_or_filtered"); 1521 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1522 "dropped_link_overflow", 1523 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1524 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1525 "dropped_link_overflow"); 1526 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1527 "dropped_multicast_filtered", 1528 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1529 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1530 "dropped_multicast_filtered"); 1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1532 "dropped_no_big_buffer", 1533 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1534 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1535 "dropped_no_big_buffer"); 1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1537 "dropped_no_small_buffer", 1538 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1539 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1540 "dropped_no_small_buffer"); 1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1542 "dropped_overrun", 1543 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1544 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1545 "dropped_overrun"); 1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1547 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1548 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1549 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1550 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1551 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1552 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "dropped_unicast_filtered", 1555 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1556 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1557 "dropped_unicast_filtered"); 1558 1559 /* verbose printing? */ 1560 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1561 "verbose", 1562 CTLFLAG_RW, &mxge_verbose, 1563 0, "verbose printing"); 1564 1565 /* add counters exported for debugging from all slices */ 1566 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1567 sc->slice_sysctl_tree = 1568 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1569 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1570 1571 for (slice = 0; slice < sc->num_slices; slice++) { 1572 ss = &sc->ss[slice]; 1573 sysctl_ctx_init(&ss->sysctl_ctx); 1574 ctx = &ss->sysctl_ctx; 1575 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1576 sprintf(slice_num, "%d", slice); 1577 ss->sysctl_tree = 1578 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1579 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1580 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1581 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1582 "rx_small_cnt", 1583 CTLFLAG_RD, &ss->rx_small.cnt, 1584 0, "rx_small_cnt"); 1585 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1586 "rx_big_cnt", 1587 CTLFLAG_RD, &ss->rx_big.cnt, 1588 0, "rx_small_cnt"); 1589 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1590 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1591 0, "number of lro merge queues flushed"); 1592 1593 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1594 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1595 0, "number of bad csums preventing LRO"); 1596 1597 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1598 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1599 0, "number of frames appended to lro merge" 1600 "queues"); 1601 1602 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1603 "tx_req", 1604 CTLFLAG_RD, &ss->tx.req, 1605 0, "tx_req"); 1606 1607 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1608 "tx_done", 1609 CTLFLAG_RD, &ss->tx.done, 1610 0, "tx_done"); 1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1612 "tx_pkt_done", 1613 CTLFLAG_RD, &ss->tx.pkt_done, 1614 0, "tx_done"); 1615 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1616 "tx_stall", 1617 CTLFLAG_RD, &ss->tx.stall, 1618 0, "tx_stall"); 1619 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1620 "tx_wake", 1621 CTLFLAG_RD, &ss->tx.wake, 1622 0, "tx_wake"); 1623 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1624 "tx_defrag", 1625 CTLFLAG_RD, &ss->tx.defrag, 1626 0, "tx_defrag"); 1627 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1628 "tx_queue_active", 1629 CTLFLAG_RD, &ss->tx.queue_active, 1630 0, "tx_queue_active"); 1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1632 "tx_activate", 1633 CTLFLAG_RD, &ss->tx.activate, 1634 0, "tx_activate"); 1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1636 "tx_deactivate", 1637 CTLFLAG_RD, &ss->tx.deactivate, 1638 0, "tx_deactivate"); 1639 } 1640 } 1641 1642 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1643 backwards one at a time and handle ring wraps */ 1644 1645 static inline void 1646 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1647 mcp_kreq_ether_send_t *src, int cnt) 1648 { 1649 int idx, starting_slot; 1650 starting_slot = tx->req; 1651 while (cnt > 1) { 1652 cnt--; 1653 idx = (starting_slot + cnt) & tx->mask; 1654 mxge_pio_copy(&tx->lanai[idx], 1655 &src[cnt], sizeof(*src)); 1656 wmb(); 1657 } 1658 } 1659 1660 /* 1661 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1662 * at most 32 bytes at a time, so as to avoid involving the software 1663 * pio handler in the nic. We re-write the first segment's flags 1664 * to mark them valid only after writing the entire chain 1665 */ 1666 1667 static inline void 1668 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1669 int cnt) 1670 { 1671 int idx, i; 1672 uint32_t *src_ints; 1673 volatile uint32_t *dst_ints; 1674 mcp_kreq_ether_send_t *srcp; 1675 volatile mcp_kreq_ether_send_t *dstp, *dst; 1676 uint8_t last_flags; 1677 1678 idx = tx->req & tx->mask; 1679 1680 last_flags = src->flags; 1681 src->flags = 0; 1682 wmb(); 1683 dst = dstp = &tx->lanai[idx]; 1684 srcp = src; 1685 1686 if ((idx + cnt) < tx->mask) { 1687 for (i = 0; i < (cnt - 1); i += 2) { 1688 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1689 wmb(); /* force write every 32 bytes */ 1690 srcp += 2; 1691 dstp += 2; 1692 } 1693 } else { 1694 /* submit all but the first request, and ensure 1695 that it is submitted below */ 1696 mxge_submit_req_backwards(tx, src, cnt); 1697 i = 0; 1698 } 1699 if (i < cnt) { 1700 /* submit the first request */ 1701 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1702 wmb(); /* barrier before setting valid flag */ 1703 } 1704 1705 /* re-write the last 32-bits with the valid flags */ 1706 src->flags = last_flags; 1707 src_ints = (uint32_t *)src; 1708 src_ints+=3; 1709 dst_ints = (volatile uint32_t *)dst; 1710 dst_ints+=3; 1711 *dst_ints = *src_ints; 1712 tx->req += cnt; 1713 wmb(); 1714 } 1715 1716 static int 1717 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1718 struct mxge_pkt_info *pi) 1719 { 1720 struct ether_vlan_header *eh; 1721 uint16_t etype; 1722 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1723 #if IFCAP_TSO6 && defined(INET6) 1724 int nxt; 1725 #endif 1726 1727 eh = mtod(m, struct ether_vlan_header *); 1728 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1729 etype = ntohs(eh->evl_proto); 1730 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1731 } else { 1732 etype = ntohs(eh->evl_encap_proto); 1733 pi->ip_off = ETHER_HDR_LEN; 1734 } 1735 1736 switch (etype) { 1737 case ETHERTYPE_IP: 1738 /* 1739 * ensure ip header is in first mbuf, copy it to a 1740 * scratch buffer if not 1741 */ 1742 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1743 pi->ip6 = NULL; 1744 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1745 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1746 ss->scratch); 1747 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1748 } 1749 pi->ip_hlen = pi->ip->ip_hl << 2; 1750 if (!tso) 1751 return 0; 1752 1753 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1754 sizeof(struct tcphdr))) { 1755 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1756 sizeof(struct tcphdr), ss->scratch); 1757 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1758 } 1759 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1760 break; 1761 #if IFCAP_TSO6 && defined(INET6) 1762 case ETHERTYPE_IPV6: 1763 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1764 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1765 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1766 ss->scratch); 1767 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1768 } 1769 nxt = 0; 1770 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1771 pi->ip_hlen -= pi->ip_off; 1772 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1773 return EINVAL; 1774 1775 if (!tso) 1776 return 0; 1777 1778 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1779 return EINVAL; 1780 1781 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1782 sizeof(struct tcphdr))) { 1783 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1784 sizeof(struct tcphdr), ss->scratch); 1785 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1786 } 1787 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1788 break; 1789 #endif 1790 default: 1791 return EINVAL; 1792 } 1793 return 0; 1794 } 1795 1796 #if IFCAP_TSO4 1797 1798 static void 1799 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1800 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1801 { 1802 mxge_tx_ring_t *tx; 1803 mcp_kreq_ether_send_t *req; 1804 bus_dma_segment_t *seg; 1805 uint32_t low, high_swapped; 1806 int len, seglen, cum_len, cum_len_next; 1807 int next_is_first, chop, cnt, rdma_count, small; 1808 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1809 uint8_t flags, flags_next; 1810 static int once; 1811 1812 mss = m->m_pkthdr.tso_segsz; 1813 1814 /* negative cum_len signifies to the 1815 * send loop that we are still in the 1816 * header portion of the TSO packet. 1817 */ 1818 1819 cksum_offset = pi->ip_off + pi->ip_hlen; 1820 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1821 1822 /* TSO implies checksum offload on this hardware */ 1823 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1824 /* 1825 * If packet has full TCP csum, replace it with pseudo hdr 1826 * sum that the NIC expects, otherwise the NIC will emit 1827 * packets with bad TCP checksums. 1828 */ 1829 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1830 if (pi->ip6) { 1831 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1832 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1833 sum = in6_cksum_pseudo(pi->ip6, 1834 m->m_pkthdr.len - cksum_offset, 1835 IPPROTO_TCP, 0); 1836 #endif 1837 } else { 1838 #ifdef INET 1839 m->m_pkthdr.csum_flags |= CSUM_TCP; 1840 sum = in_pseudo(pi->ip->ip_src.s_addr, 1841 pi->ip->ip_dst.s_addr, 1842 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1843 cksum_offset))); 1844 #endif 1845 } 1846 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1847 cksum_offset, sizeof(sum), (caddr_t)&sum); 1848 } 1849 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1850 1851 /* for TSO, pseudo_hdr_offset holds mss. 1852 * The firmware figures out where to put 1853 * the checksum by parsing the header. */ 1854 pseudo_hdr_offset = htobe16(mss); 1855 1856 if (pi->ip6) { 1857 /* 1858 * for IPv6 TSO, the "checksum offset" is re-purposed 1859 * to store the TCP header len 1860 */ 1861 cksum_offset = (pi->tcp->th_off << 2); 1862 } 1863 1864 tx = &ss->tx; 1865 req = tx->req_list; 1866 seg = tx->seg_list; 1867 cnt = 0; 1868 rdma_count = 0; 1869 /* "rdma_count" is the number of RDMAs belonging to the 1870 * current packet BEFORE the current send request. For 1871 * non-TSO packets, this is equal to "count". 1872 * For TSO packets, rdma_count needs to be reset 1873 * to 0 after a segment cut. 1874 * 1875 * The rdma_count field of the send request is 1876 * the number of RDMAs of the packet starting at 1877 * that request. For TSO send requests with one ore more cuts 1878 * in the middle, this is the number of RDMAs starting 1879 * after the last cut in the request. All previous 1880 * segments before the last cut implicitly have 1 RDMA. 1881 * 1882 * Since the number of RDMAs is not known beforehand, 1883 * it must be filled-in retroactively - after each 1884 * segmentation cut or at the end of the entire packet. 1885 */ 1886 1887 while (busdma_seg_cnt) { 1888 /* Break the busdma segment up into pieces*/ 1889 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1890 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1891 len = seg->ds_len; 1892 1893 while (len) { 1894 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1895 seglen = len; 1896 cum_len_next = cum_len + seglen; 1897 (req-rdma_count)->rdma_count = rdma_count + 1; 1898 if (__predict_true(cum_len >= 0)) { 1899 /* payload */ 1900 chop = (cum_len_next > mss); 1901 cum_len_next = cum_len_next % mss; 1902 next_is_first = (cum_len_next == 0); 1903 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1904 flags_next |= next_is_first * 1905 MXGEFW_FLAGS_FIRST; 1906 rdma_count |= -(chop | next_is_first); 1907 rdma_count += chop & !next_is_first; 1908 } else if (cum_len_next >= 0) { 1909 /* header ends */ 1910 rdma_count = -1; 1911 cum_len_next = 0; 1912 seglen = -cum_len; 1913 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1914 flags_next = MXGEFW_FLAGS_TSO_PLD | 1915 MXGEFW_FLAGS_FIRST | 1916 (small * MXGEFW_FLAGS_SMALL); 1917 } 1918 1919 req->addr_high = high_swapped; 1920 req->addr_low = htobe32(low); 1921 req->pseudo_hdr_offset = pseudo_hdr_offset; 1922 req->pad = 0; 1923 req->rdma_count = 1; 1924 req->length = htobe16(seglen); 1925 req->cksum_offset = cksum_offset; 1926 req->flags = flags | ((cum_len & 1) * 1927 MXGEFW_FLAGS_ALIGN_ODD); 1928 low += seglen; 1929 len -= seglen; 1930 cum_len = cum_len_next; 1931 flags = flags_next; 1932 req++; 1933 cnt++; 1934 rdma_count++; 1935 if (cksum_offset != 0 && !pi->ip6) { 1936 if (__predict_false(cksum_offset > seglen)) 1937 cksum_offset -= seglen; 1938 else 1939 cksum_offset = 0; 1940 } 1941 if (__predict_false(cnt > tx->max_desc)) 1942 goto drop; 1943 } 1944 busdma_seg_cnt--; 1945 seg++; 1946 } 1947 (req-rdma_count)->rdma_count = rdma_count; 1948 1949 do { 1950 req--; 1951 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1953 1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1955 mxge_submit_req(tx, tx->req_list, cnt); 1956 1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1958 /* tell the NIC to start polling this slice */ 1959 *tx->send_go = 1; 1960 tx->queue_active = 1; 1961 tx->activate++; 1962 wmb(); 1963 } 1964 1965 return; 1966 1967 drop: 1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1969 m_freem(m); 1970 ss->oerrors++; 1971 if (!once) { 1972 printf("tx->max_desc exceeded via TSO!\n"); 1973 printf("mss = %d, %ld, %d!\n", mss, 1974 (long)seg - (long)tx->seg_list, tx->max_desc); 1975 once = 1; 1976 } 1977 return; 1978 1979 } 1980 1981 #endif /* IFCAP_TSO4 */ 1982 1983 #ifdef MXGE_NEW_VLAN_API 1984 /* 1985 * We reproduce the software vlan tag insertion from 1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1987 * vlan tag insertion. We need to advertise this in order to have the 1988 * vlan interface respect our csum offload flags. 1989 */ 1990 static struct mbuf * 1991 mxge_vlan_tag_insert(struct mbuf *m) 1992 { 1993 struct ether_vlan_header *evl; 1994 1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 1996 if (__predict_false(m == NULL)) 1997 return NULL; 1998 if (m->m_len < sizeof(*evl)) { 1999 m = m_pullup(m, sizeof(*evl)); 2000 if (__predict_false(m == NULL)) 2001 return NULL; 2002 } 2003 /* 2004 * Transform the Ethernet header into an Ethernet header 2005 * with 802.1Q encapsulation. 2006 */ 2007 evl = mtod(m, struct ether_vlan_header *); 2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2012 m->m_flags &= ~M_VLANTAG; 2013 return m; 2014 } 2015 #endif /* MXGE_NEW_VLAN_API */ 2016 2017 static void 2018 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2019 { 2020 struct mxge_pkt_info pi = {0,0,0,0}; 2021 mxge_softc_t *sc; 2022 mcp_kreq_ether_send_t *req; 2023 bus_dma_segment_t *seg; 2024 struct mbuf *m_tmp; 2025 mxge_tx_ring_t *tx; 2026 int cnt, cum_len, err, i, idx, odd_flag; 2027 uint16_t pseudo_hdr_offset; 2028 uint8_t flags, cksum_offset; 2029 2030 sc = ss->sc; 2031 tx = &ss->tx; 2032 2033 #ifdef MXGE_NEW_VLAN_API 2034 if (m->m_flags & M_VLANTAG) { 2035 m = mxge_vlan_tag_insert(m); 2036 if (__predict_false(m == NULL)) 2037 goto drop_without_m; 2038 } 2039 #endif 2040 if (m->m_pkthdr.csum_flags & 2041 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2042 if (mxge_parse_tx(ss, m, &pi)) 2043 goto drop; 2044 } 2045 2046 /* (try to) map the frame for DMA */ 2047 idx = tx->req & tx->mask; 2048 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2049 m, tx->seg_list, &cnt, 2050 BUS_DMA_NOWAIT); 2051 if (__predict_false(err == EFBIG)) { 2052 /* Too many segments in the chain. Try 2053 to defrag */ 2054 m_tmp = m_defrag(m, M_NOWAIT); 2055 if (m_tmp == NULL) { 2056 goto drop; 2057 } 2058 ss->tx.defrag++; 2059 m = m_tmp; 2060 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2061 tx->info[idx].map, 2062 m, tx->seg_list, &cnt, 2063 BUS_DMA_NOWAIT); 2064 } 2065 if (__predict_false(err != 0)) { 2066 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2067 " packet len = %d\n", err, m->m_pkthdr.len); 2068 goto drop; 2069 } 2070 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2071 BUS_DMASYNC_PREWRITE); 2072 tx->info[idx].m = m; 2073 2074 #if IFCAP_TSO4 2075 /* TSO is different enough, we handle it in another routine */ 2076 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2077 mxge_encap_tso(ss, m, cnt, &pi); 2078 return; 2079 } 2080 #endif 2081 2082 req = tx->req_list; 2083 cksum_offset = 0; 2084 pseudo_hdr_offset = 0; 2085 flags = MXGEFW_FLAGS_NO_TSO; 2086 2087 /* checksum offloading? */ 2088 if (m->m_pkthdr.csum_flags & 2089 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2090 /* ensure ip header is in first mbuf, copy 2091 it to a scratch buffer if not */ 2092 cksum_offset = pi.ip_off + pi.ip_hlen; 2093 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2094 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2095 req->cksum_offset = cksum_offset; 2096 flags |= MXGEFW_FLAGS_CKSUM; 2097 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2098 } else { 2099 odd_flag = 0; 2100 } 2101 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2102 flags |= MXGEFW_FLAGS_SMALL; 2103 2104 /* convert segments into a request list */ 2105 cum_len = 0; 2106 seg = tx->seg_list; 2107 req->flags = MXGEFW_FLAGS_FIRST; 2108 for (i = 0; i < cnt; i++) { 2109 req->addr_low = 2110 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2111 req->addr_high = 2112 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2113 req->length = htobe16(seg->ds_len); 2114 req->cksum_offset = cksum_offset; 2115 if (cksum_offset > seg->ds_len) 2116 cksum_offset -= seg->ds_len; 2117 else 2118 cksum_offset = 0; 2119 req->pseudo_hdr_offset = pseudo_hdr_offset; 2120 req->pad = 0; /* complete solid 16-byte block */ 2121 req->rdma_count = 1; 2122 req->flags |= flags | ((cum_len & 1) * odd_flag); 2123 cum_len += seg->ds_len; 2124 seg++; 2125 req++; 2126 req->flags = 0; 2127 } 2128 req--; 2129 /* pad runts to 60 bytes */ 2130 if (cum_len < 60) { 2131 req++; 2132 req->addr_low = 2133 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2134 req->addr_high = 2135 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2136 req->length = htobe16(60 - cum_len); 2137 req->cksum_offset = 0; 2138 req->pseudo_hdr_offset = pseudo_hdr_offset; 2139 req->pad = 0; /* complete solid 16-byte block */ 2140 req->rdma_count = 1; 2141 req->flags |= flags | ((cum_len & 1) * odd_flag); 2142 cnt++; 2143 } 2144 2145 tx->req_list[0].rdma_count = cnt; 2146 #if 0 2147 /* print what the firmware will see */ 2148 for (i = 0; i < cnt; i++) { 2149 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2150 "cso:%d, flags:0x%x, rdma:%d\n", 2151 i, (int)ntohl(tx->req_list[i].addr_high), 2152 (int)ntohl(tx->req_list[i].addr_low), 2153 (int)ntohs(tx->req_list[i].length), 2154 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2155 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2156 tx->req_list[i].rdma_count); 2157 } 2158 printf("--------------\n"); 2159 #endif 2160 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2161 mxge_submit_req(tx, tx->req_list, cnt); 2162 2163 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2164 /* tell the NIC to start polling this slice */ 2165 *tx->send_go = 1; 2166 tx->queue_active = 1; 2167 tx->activate++; 2168 wmb(); 2169 } 2170 2171 return; 2172 2173 drop: 2174 m_freem(m); 2175 drop_without_m: 2176 ss->oerrors++; 2177 return; 2178 } 2179 2180 static void 2181 mxge_qflush(if_t ifp) 2182 { 2183 mxge_softc_t *sc = if_getsoftc(ifp); 2184 mxge_tx_ring_t *tx; 2185 struct mbuf *m; 2186 int slice; 2187 2188 for (slice = 0; slice < sc->num_slices; slice++) { 2189 tx = &sc->ss[slice].tx; 2190 mtx_lock(&tx->mtx); 2191 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2192 m_freem(m); 2193 mtx_unlock(&tx->mtx); 2194 } 2195 if_qflush(ifp); 2196 } 2197 2198 static inline void 2199 mxge_start_locked(struct mxge_slice_state *ss) 2200 { 2201 mxge_softc_t *sc; 2202 struct mbuf *m; 2203 if_t ifp; 2204 mxge_tx_ring_t *tx; 2205 2206 sc = ss->sc; 2207 ifp = sc->ifp; 2208 tx = &ss->tx; 2209 2210 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2211 m = drbr_dequeue(ifp, tx->br); 2212 if (m == NULL) { 2213 return; 2214 } 2215 /* let BPF see it */ 2216 BPF_MTAP(ifp, m); 2217 2218 /* give it to the nic */ 2219 mxge_encap(ss, m); 2220 } 2221 /* ran out of transmit slots */ 2222 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2223 && (!drbr_empty(ifp, tx->br))) { 2224 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2225 tx->stall++; 2226 } 2227 } 2228 2229 static int 2230 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2231 { 2232 mxge_softc_t *sc; 2233 if_t ifp; 2234 mxge_tx_ring_t *tx; 2235 int err; 2236 2237 sc = ss->sc; 2238 ifp = sc->ifp; 2239 tx = &ss->tx; 2240 2241 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2242 IFF_DRV_RUNNING) { 2243 err = drbr_enqueue(ifp, tx->br, m); 2244 return (err); 2245 } 2246 2247 if (!drbr_needs_enqueue(ifp, tx->br) && 2248 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2249 /* let BPF see it */ 2250 BPF_MTAP(ifp, m); 2251 /* give it to the nic */ 2252 mxge_encap(ss, m); 2253 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2254 return (err); 2255 } 2256 if (!drbr_empty(ifp, tx->br)) 2257 mxge_start_locked(ss); 2258 return (0); 2259 } 2260 2261 static int 2262 mxge_transmit(if_t ifp, struct mbuf *m) 2263 { 2264 mxge_softc_t *sc = if_getsoftc(ifp); 2265 struct mxge_slice_state *ss; 2266 mxge_tx_ring_t *tx; 2267 int err = 0; 2268 int slice; 2269 2270 slice = m->m_pkthdr.flowid; 2271 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2272 2273 ss = &sc->ss[slice]; 2274 tx = &ss->tx; 2275 2276 if (mtx_trylock(&tx->mtx)) { 2277 err = mxge_transmit_locked(ss, m); 2278 mtx_unlock(&tx->mtx); 2279 } else { 2280 err = drbr_enqueue(ifp, tx->br, m); 2281 } 2282 2283 return (err); 2284 } 2285 2286 static void 2287 mxge_start(if_t ifp) 2288 { 2289 mxge_softc_t *sc = if_getsoftc(ifp); 2290 struct mxge_slice_state *ss; 2291 2292 /* only use the first slice for now */ 2293 ss = &sc->ss[0]; 2294 mtx_lock(&ss->tx.mtx); 2295 mxge_start_locked(ss); 2296 mtx_unlock(&ss->tx.mtx); 2297 } 2298 2299 /* 2300 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2301 * at most 32 bytes at a time, so as to avoid involving the software 2302 * pio handler in the nic. We re-write the first segment's low 2303 * DMA address to mark it valid only after we write the entire chunk 2304 * in a burst 2305 */ 2306 static inline void 2307 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2308 mcp_kreq_ether_recv_t *src) 2309 { 2310 uint32_t low; 2311 2312 low = src->addr_low; 2313 src->addr_low = 0xffffffff; 2314 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2315 wmb(); 2316 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2317 wmb(); 2318 src->addr_low = low; 2319 dst->addr_low = low; 2320 wmb(); 2321 } 2322 2323 static int 2324 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2325 { 2326 bus_dma_segment_t seg; 2327 struct mbuf *m; 2328 mxge_rx_ring_t *rx = &ss->rx_small; 2329 int cnt, err; 2330 2331 m = m_gethdr(M_NOWAIT, MT_DATA); 2332 if (m == NULL) { 2333 rx->alloc_fail++; 2334 err = ENOBUFS; 2335 goto done; 2336 } 2337 m->m_len = MHLEN; 2338 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2339 &seg, &cnt, BUS_DMA_NOWAIT); 2340 if (err != 0) { 2341 m_free(m); 2342 goto done; 2343 } 2344 rx->info[idx].m = m; 2345 rx->shadow[idx].addr_low = 2346 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2347 rx->shadow[idx].addr_high = 2348 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2349 2350 done: 2351 if ((idx & 7) == 7) 2352 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2353 return err; 2354 } 2355 2356 static int 2357 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2358 { 2359 bus_dma_segment_t seg[3]; 2360 struct mbuf *m; 2361 mxge_rx_ring_t *rx = &ss->rx_big; 2362 int cnt, err, i; 2363 2364 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2365 if (m == NULL) { 2366 rx->alloc_fail++; 2367 err = ENOBUFS; 2368 goto done; 2369 } 2370 m->m_len = rx->mlen; 2371 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2372 seg, &cnt, BUS_DMA_NOWAIT); 2373 if (err != 0) { 2374 m_free(m); 2375 goto done; 2376 } 2377 rx->info[idx].m = m; 2378 rx->shadow[idx].addr_low = 2379 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2380 rx->shadow[idx].addr_high = 2381 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2382 2383 done: 2384 for (i = 0; i < rx->nbufs; i++) { 2385 if ((idx & 7) == 7) { 2386 mxge_submit_8rx(&rx->lanai[idx - 7], 2387 &rx->shadow[idx - 7]); 2388 } 2389 idx++; 2390 } 2391 return err; 2392 } 2393 2394 #ifdef INET6 2395 2396 static uint16_t 2397 mxge_csum_generic(uint16_t *raw, int len) 2398 { 2399 uint32_t csum; 2400 2401 csum = 0; 2402 while (len > 0) { 2403 csum += *raw; 2404 raw++; 2405 len -= 2; 2406 } 2407 csum = (csum >> 16) + (csum & 0xffff); 2408 csum = (csum >> 16) + (csum & 0xffff); 2409 return (uint16_t)csum; 2410 } 2411 2412 static inline uint16_t 2413 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2414 { 2415 uint32_t partial; 2416 int nxt, cksum_offset; 2417 struct ip6_hdr *ip6 = p; 2418 uint16_t c; 2419 2420 nxt = ip6->ip6_nxt; 2421 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2422 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2423 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2424 IPPROTO_IPV6, &nxt); 2425 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2426 return (1); 2427 } 2428 2429 /* 2430 * IPv6 headers do not contain a checksum, and hence 2431 * do not checksum to zero, so they don't "fall out" 2432 * of the partial checksum calculation like IPv4 2433 * headers do. We need to fix the partial checksum by 2434 * subtracting the checksum of the IPv6 header. 2435 */ 2436 2437 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2438 ETHER_HDR_LEN); 2439 csum += ~partial; 2440 csum += (csum < ~partial); 2441 csum = (csum >> 16) + (csum & 0xFFFF); 2442 csum = (csum >> 16) + (csum & 0xFFFF); 2443 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2444 csum); 2445 c ^= 0xffff; 2446 return (c); 2447 } 2448 #endif /* INET6 */ 2449 /* 2450 * Myri10GE hardware checksums are not valid if the sender 2451 * padded the frame with non-zero padding. This is because 2452 * the firmware just does a simple 16-bit 1s complement 2453 * checksum across the entire frame, excluding the first 14 2454 * bytes. It is best to simply to check the checksum and 2455 * tell the stack about it only if the checksum is good 2456 */ 2457 2458 static inline uint16_t 2459 mxge_rx_csum(struct mbuf *m, int csum) 2460 { 2461 struct ether_header *eh; 2462 #ifdef INET 2463 struct ip *ip; 2464 #endif 2465 #if defined(INET) || defined(INET6) 2466 int cap = if_getcapenable(m->m_pkthdr.rcvif); 2467 #endif 2468 uint16_t c, etype; 2469 2470 eh = mtod(m, struct ether_header *); 2471 etype = ntohs(eh->ether_type); 2472 switch (etype) { 2473 #ifdef INET 2474 case ETHERTYPE_IP: 2475 if ((cap & IFCAP_RXCSUM) == 0) 2476 return (1); 2477 ip = (struct ip *)(eh + 1); 2478 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2479 return (1); 2480 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2481 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2482 (ip->ip_hl << 2) + ip->ip_p)); 2483 c ^= 0xffff; 2484 break; 2485 #endif 2486 #ifdef INET6 2487 case ETHERTYPE_IPV6: 2488 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2489 return (1); 2490 c = mxge_rx_csum6((eh + 1), m, csum); 2491 break; 2492 #endif 2493 default: 2494 c = 1; 2495 } 2496 return (c); 2497 } 2498 2499 static void 2500 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2501 { 2502 struct ether_vlan_header *evl; 2503 uint32_t partial; 2504 2505 evl = mtod(m, struct ether_vlan_header *); 2506 2507 /* 2508 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2509 * after what the firmware thought was the end of the ethernet 2510 * header. 2511 */ 2512 2513 /* put checksum into host byte order */ 2514 *csum = ntohs(*csum); 2515 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2516 (*csum) += ~partial; 2517 (*csum) += ((*csum) < ~partial); 2518 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2519 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2520 2521 /* restore checksum to network byte order; 2522 later consumers expect this */ 2523 *csum = htons(*csum); 2524 2525 /* save the tag */ 2526 #ifdef MXGE_NEW_VLAN_API 2527 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2528 #else 2529 { 2530 struct m_tag *mtag; 2531 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2532 M_NOWAIT); 2533 if (mtag == NULL) 2534 return; 2535 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2536 m_tag_prepend(m, mtag); 2537 } 2538 2539 #endif 2540 m->m_flags |= M_VLANTAG; 2541 2542 /* 2543 * Remove the 802.1q header by copying the Ethernet 2544 * addresses over it and adjusting the beginning of 2545 * the data in the mbuf. The encapsulated Ethernet 2546 * type field is already in place. 2547 */ 2548 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2549 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2550 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2551 } 2552 2553 static inline void 2554 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2555 uint32_t csum, int lro) 2556 { 2557 mxge_softc_t *sc; 2558 if_t ifp; 2559 struct mbuf *m; 2560 struct ether_header *eh; 2561 mxge_rx_ring_t *rx; 2562 bus_dmamap_t old_map; 2563 int idx; 2564 2565 sc = ss->sc; 2566 ifp = sc->ifp; 2567 rx = &ss->rx_big; 2568 idx = rx->cnt & rx->mask; 2569 rx->cnt += rx->nbufs; 2570 /* save a pointer to the received mbuf */ 2571 m = rx->info[idx].m; 2572 /* try to replace the received mbuf */ 2573 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2574 /* drop the frame -- the old mbuf is re-cycled */ 2575 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2576 return; 2577 } 2578 2579 /* unmap the received buffer */ 2580 old_map = rx->info[idx].map; 2581 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2582 bus_dmamap_unload(rx->dmat, old_map); 2583 2584 /* swap the bus_dmamap_t's */ 2585 rx->info[idx].map = rx->extra_map; 2586 rx->extra_map = old_map; 2587 2588 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2589 * aligned */ 2590 m->m_data += MXGEFW_PAD; 2591 2592 m->m_pkthdr.rcvif = ifp; 2593 m->m_len = m->m_pkthdr.len = len; 2594 ss->ipackets++; 2595 eh = mtod(m, struct ether_header *); 2596 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2597 mxge_vlan_tag_remove(m, &csum); 2598 } 2599 /* flowid only valid if RSS hashing is enabled */ 2600 if (sc->num_slices > 1) { 2601 m->m_pkthdr.flowid = (ss - sc->ss); 2602 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2603 } 2604 /* if the checksum is valid, mark it in the mbuf header */ 2605 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2606 (0 == mxge_rx_csum(m, csum))) { 2607 /* Tell the stack that the checksum is good */ 2608 m->m_pkthdr.csum_data = 0xffff; 2609 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2610 CSUM_DATA_VALID; 2611 2612 #if defined(INET) || defined (INET6) 2613 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2614 return; 2615 #endif 2616 } 2617 /* pass the frame up the stack */ 2618 if_input(ifp, m); 2619 } 2620 2621 static inline void 2622 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2623 uint32_t csum, int lro) 2624 { 2625 mxge_softc_t *sc; 2626 if_t ifp; 2627 struct ether_header *eh; 2628 struct mbuf *m; 2629 mxge_rx_ring_t *rx; 2630 bus_dmamap_t old_map; 2631 int idx; 2632 2633 sc = ss->sc; 2634 ifp = sc->ifp; 2635 rx = &ss->rx_small; 2636 idx = rx->cnt & rx->mask; 2637 rx->cnt++; 2638 /* save a pointer to the received mbuf */ 2639 m = rx->info[idx].m; 2640 /* try to replace the received mbuf */ 2641 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2642 /* drop the frame -- the old mbuf is re-cycled */ 2643 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2644 return; 2645 } 2646 2647 /* unmap the received buffer */ 2648 old_map = rx->info[idx].map; 2649 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2650 bus_dmamap_unload(rx->dmat, old_map); 2651 2652 /* swap the bus_dmamap_t's */ 2653 rx->info[idx].map = rx->extra_map; 2654 rx->extra_map = old_map; 2655 2656 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2657 * aligned */ 2658 m->m_data += MXGEFW_PAD; 2659 2660 m->m_pkthdr.rcvif = ifp; 2661 m->m_len = m->m_pkthdr.len = len; 2662 ss->ipackets++; 2663 eh = mtod(m, struct ether_header *); 2664 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2665 mxge_vlan_tag_remove(m, &csum); 2666 } 2667 /* flowid only valid if RSS hashing is enabled */ 2668 if (sc->num_slices > 1) { 2669 m->m_pkthdr.flowid = (ss - sc->ss); 2670 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2671 } 2672 /* if the checksum is valid, mark it in the mbuf header */ 2673 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2674 (0 == mxge_rx_csum(m, csum))) { 2675 /* Tell the stack that the checksum is good */ 2676 m->m_pkthdr.csum_data = 0xffff; 2677 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2678 CSUM_DATA_VALID; 2679 2680 #if defined(INET) || defined (INET6) 2681 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2682 return; 2683 #endif 2684 } 2685 /* pass the frame up the stack */ 2686 if_input(ifp, m); 2687 } 2688 2689 static inline void 2690 mxge_clean_rx_done(struct mxge_slice_state *ss) 2691 { 2692 mxge_rx_done_t *rx_done = &ss->rx_done; 2693 int limit = 0; 2694 uint16_t length; 2695 uint16_t checksum; 2696 int lro; 2697 2698 lro = if_getcapenable(ss->sc->ifp) & IFCAP_LRO; 2699 while (rx_done->entry[rx_done->idx].length != 0) { 2700 length = ntohs(rx_done->entry[rx_done->idx].length); 2701 rx_done->entry[rx_done->idx].length = 0; 2702 checksum = rx_done->entry[rx_done->idx].checksum; 2703 if (length <= (MHLEN - MXGEFW_PAD)) 2704 mxge_rx_done_small(ss, length, checksum, lro); 2705 else 2706 mxge_rx_done_big(ss, length, checksum, lro); 2707 rx_done->cnt++; 2708 rx_done->idx = rx_done->cnt & rx_done->mask; 2709 2710 /* limit potential for livelock */ 2711 if (__predict_false(++limit > rx_done->mask / 2)) 2712 break; 2713 } 2714 #if defined(INET) || defined (INET6) 2715 tcp_lro_flush_all(&ss->lc); 2716 #endif 2717 } 2718 2719 static inline void 2720 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2721 { 2722 if_t ifp __unused; 2723 mxge_tx_ring_t *tx; 2724 struct mbuf *m; 2725 bus_dmamap_t map; 2726 int idx; 2727 int *flags; 2728 2729 tx = &ss->tx; 2730 ifp = ss->sc->ifp; 2731 while (tx->pkt_done != mcp_idx) { 2732 idx = tx->done & tx->mask; 2733 tx->done++; 2734 m = tx->info[idx].m; 2735 /* mbuf and DMA map only attached to the first 2736 segment per-mbuf */ 2737 if (m != NULL) { 2738 ss->obytes += m->m_pkthdr.len; 2739 if (m->m_flags & M_MCAST) 2740 ss->omcasts++; 2741 ss->opackets++; 2742 tx->info[idx].m = NULL; 2743 map = tx->info[idx].map; 2744 bus_dmamap_unload(tx->dmat, map); 2745 m_freem(m); 2746 } 2747 if (tx->info[idx].flag) { 2748 tx->info[idx].flag = 0; 2749 tx->pkt_done++; 2750 } 2751 } 2752 2753 /* If we have space, clear IFF_OACTIVE to tell the stack that 2754 its OK to send packets */ 2755 flags = &ss->if_drv_flags; 2756 mtx_lock(&ss->tx.mtx); 2757 if ((*flags) & IFF_DRV_OACTIVE && 2758 tx->req - tx->done < (tx->mask + 1)/4) { 2759 *(flags) &= ~IFF_DRV_OACTIVE; 2760 ss->tx.wake++; 2761 mxge_start_locked(ss); 2762 } 2763 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2764 /* let the NIC stop polling this queue, since there 2765 * are no more transmits pending */ 2766 if (tx->req == tx->done) { 2767 *tx->send_stop = 1; 2768 tx->queue_active = 0; 2769 tx->deactivate++; 2770 wmb(); 2771 } 2772 } 2773 mtx_unlock(&ss->tx.mtx); 2774 } 2775 2776 static struct mxge_media_type mxge_xfp_media_types[] = 2777 { 2778 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2779 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2780 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2781 {0, (1 << 5), "10GBASE-ER"}, 2782 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2783 {0, (1 << 3), "10GBASE-SW"}, 2784 {0, (1 << 2), "10GBASE-LW"}, 2785 {0, (1 << 1), "10GBASE-EW"}, 2786 {0, (1 << 0), "Reserved"} 2787 }; 2788 static struct mxge_media_type mxge_sfp_media_types[] = 2789 { 2790 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2791 {0, (1 << 7), "Reserved"}, 2792 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2793 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2794 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2795 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2796 }; 2797 2798 static void 2799 mxge_media_set(mxge_softc_t *sc, int media_type) 2800 { 2801 2802 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2803 0, NULL); 2804 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2805 sc->current_media = media_type; 2806 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2807 } 2808 2809 static void 2810 mxge_media_init(mxge_softc_t *sc) 2811 { 2812 char *ptr; 2813 int i; 2814 2815 ifmedia_removeall(&sc->media); 2816 mxge_media_set(sc, IFM_AUTO); 2817 2818 /* 2819 * parse the product code to deterimine the interface type 2820 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2821 * after the 3rd dash in the driver's cached copy of the 2822 * EEPROM's product code string. 2823 */ 2824 ptr = sc->product_code_string; 2825 if (ptr == NULL) { 2826 device_printf(sc->dev, "Missing product code\n"); 2827 return; 2828 } 2829 2830 for (i = 0; i < 3; i++, ptr++) { 2831 ptr = strchr(ptr, '-'); 2832 if (ptr == NULL) { 2833 device_printf(sc->dev, 2834 "only %d dashes in PC?!?\n", i); 2835 return; 2836 } 2837 } 2838 if (*ptr == 'C' || *(ptr +1) == 'C') { 2839 /* -C is CX4 */ 2840 sc->connector = MXGE_CX4; 2841 mxge_media_set(sc, IFM_10G_CX4); 2842 } else if (*ptr == 'Q') { 2843 /* -Q is Quad Ribbon Fiber */ 2844 sc->connector = MXGE_QRF; 2845 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2846 /* FreeBSD has no media type for Quad ribbon fiber */ 2847 } else if (*ptr == 'R') { 2848 /* -R is XFP */ 2849 sc->connector = MXGE_XFP; 2850 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2851 /* -S or -2S is SFP+ */ 2852 sc->connector = MXGE_SFP; 2853 } else { 2854 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2855 } 2856 } 2857 2858 /* 2859 * Determine the media type for a NIC. Some XFPs will identify 2860 * themselves only when their link is up, so this is initiated via a 2861 * link up interrupt. However, this can potentially take up to 2862 * several milliseconds, so it is run via the watchdog routine, rather 2863 * than in the interrupt handler itself. 2864 */ 2865 static void 2866 mxge_media_probe(mxge_softc_t *sc) 2867 { 2868 mxge_cmd_t cmd; 2869 char *cage_type; 2870 2871 struct mxge_media_type *mxge_media_types = NULL; 2872 int i, err, ms, mxge_media_type_entries; 2873 uint32_t byte; 2874 2875 sc->need_media_probe = 0; 2876 2877 if (sc->connector == MXGE_XFP) { 2878 /* -R is XFP */ 2879 mxge_media_types = mxge_xfp_media_types; 2880 mxge_media_type_entries = 2881 nitems(mxge_xfp_media_types); 2882 byte = MXGE_XFP_COMPLIANCE_BYTE; 2883 cage_type = "XFP"; 2884 } else if (sc->connector == MXGE_SFP) { 2885 /* -S or -2S is SFP+ */ 2886 mxge_media_types = mxge_sfp_media_types; 2887 mxge_media_type_entries = 2888 nitems(mxge_sfp_media_types); 2889 cage_type = "SFP+"; 2890 byte = 3; 2891 } else { 2892 /* nothing to do; media type cannot change */ 2893 return; 2894 } 2895 2896 /* 2897 * At this point we know the NIC has an XFP cage, so now we 2898 * try to determine what is in the cage by using the 2899 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2900 * register. We read just one byte, which may take over 2901 * a millisecond 2902 */ 2903 2904 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2905 cmd.data1 = byte; 2906 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2907 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2908 device_printf(sc->dev, "failed to read XFP\n"); 2909 } 2910 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2911 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2912 } 2913 if (err != MXGEFW_CMD_OK) { 2914 return; 2915 } 2916 2917 /* now we wait for the data to be cached */ 2918 cmd.data0 = byte; 2919 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2920 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2921 DELAY(1000); 2922 cmd.data0 = byte; 2923 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2924 } 2925 if (err != MXGEFW_CMD_OK) { 2926 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2927 cage_type, err, ms); 2928 return; 2929 } 2930 2931 if (cmd.data0 == mxge_media_types[0].bitmask) { 2932 if (mxge_verbose) 2933 device_printf(sc->dev, "%s:%s\n", cage_type, 2934 mxge_media_types[0].name); 2935 if (sc->current_media != mxge_media_types[0].flag) { 2936 mxge_media_init(sc); 2937 mxge_media_set(sc, mxge_media_types[0].flag); 2938 } 2939 return; 2940 } 2941 for (i = 1; i < mxge_media_type_entries; i++) { 2942 if (cmd.data0 & mxge_media_types[i].bitmask) { 2943 if (mxge_verbose) 2944 device_printf(sc->dev, "%s:%s\n", 2945 cage_type, 2946 mxge_media_types[i].name); 2947 2948 if (sc->current_media != mxge_media_types[i].flag) { 2949 mxge_media_init(sc); 2950 mxge_media_set(sc, mxge_media_types[i].flag); 2951 } 2952 return; 2953 } 2954 } 2955 if (mxge_verbose) 2956 device_printf(sc->dev, "%s media 0x%x unknown\n", 2957 cage_type, cmd.data0); 2958 2959 return; 2960 } 2961 2962 static void 2963 mxge_intr(void *arg) 2964 { 2965 struct mxge_slice_state *ss = arg; 2966 mxge_softc_t *sc = ss->sc; 2967 mcp_irq_data_t *stats = ss->fw_stats; 2968 mxge_tx_ring_t *tx = &ss->tx; 2969 mxge_rx_done_t *rx_done = &ss->rx_done; 2970 uint32_t send_done_count; 2971 uint8_t valid; 2972 2973 /* make sure the DMA has finished */ 2974 if (!stats->valid) { 2975 return; 2976 } 2977 valid = stats->valid; 2978 2979 if (sc->legacy_irq) { 2980 /* lower legacy IRQ */ 2981 *sc->irq_deassert = 0; 2982 if (!mxge_deassert_wait) 2983 /* don't wait for conf. that irq is low */ 2984 stats->valid = 0; 2985 } else { 2986 stats->valid = 0; 2987 } 2988 2989 /* loop while waiting for legacy irq deassertion */ 2990 do { 2991 /* check for transmit completes and receives */ 2992 send_done_count = be32toh(stats->send_done_count); 2993 while ((send_done_count != tx->pkt_done) || 2994 (rx_done->entry[rx_done->idx].length != 0)) { 2995 if (send_done_count != tx->pkt_done) 2996 mxge_tx_done(ss, (int)send_done_count); 2997 mxge_clean_rx_done(ss); 2998 send_done_count = be32toh(stats->send_done_count); 2999 } 3000 if (sc->legacy_irq && mxge_deassert_wait) 3001 wmb(); 3002 } while (*((volatile uint8_t *) &stats->valid)); 3003 3004 /* fw link & error stats meaningful only on the first slice */ 3005 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3006 if (sc->link_state != stats->link_up) { 3007 sc->link_state = stats->link_up; 3008 if (sc->link_state) { 3009 if_link_state_change(sc->ifp, LINK_STATE_UP); 3010 if (mxge_verbose) 3011 device_printf(sc->dev, "link up\n"); 3012 } else { 3013 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3014 if (mxge_verbose) 3015 device_printf(sc->dev, "link down\n"); 3016 } 3017 sc->need_media_probe = 1; 3018 } 3019 if (sc->rdma_tags_available != 3020 be32toh(stats->rdma_tags_available)) { 3021 sc->rdma_tags_available = 3022 be32toh(stats->rdma_tags_available); 3023 device_printf(sc->dev, "RDMA timed out! %d tags " 3024 "left\n", sc->rdma_tags_available); 3025 } 3026 3027 if (stats->link_down) { 3028 sc->down_cnt += stats->link_down; 3029 sc->link_state = 0; 3030 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3031 } 3032 } 3033 3034 /* check to see if we have rx token to pass back */ 3035 if (valid & 0x1) 3036 *ss->irq_claim = be32toh(3); 3037 *(ss->irq_claim + 1) = be32toh(3); 3038 } 3039 3040 static void 3041 mxge_init(void *arg) 3042 { 3043 mxge_softc_t *sc = arg; 3044 if_t ifp = sc->ifp; 3045 3046 mtx_lock(&sc->driver_mtx); 3047 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) 3048 (void) mxge_open(sc); 3049 mtx_unlock(&sc->driver_mtx); 3050 } 3051 3052 static void 3053 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3054 { 3055 int i; 3056 3057 #if defined(INET) || defined(INET6) 3058 tcp_lro_free(&ss->lc); 3059 #endif 3060 for (i = 0; i <= ss->rx_big.mask; i++) { 3061 if (ss->rx_big.info[i].m == NULL) 3062 continue; 3063 bus_dmamap_unload(ss->rx_big.dmat, 3064 ss->rx_big.info[i].map); 3065 m_freem(ss->rx_big.info[i].m); 3066 ss->rx_big.info[i].m = NULL; 3067 } 3068 3069 for (i = 0; i <= ss->rx_small.mask; i++) { 3070 if (ss->rx_small.info[i].m == NULL) 3071 continue; 3072 bus_dmamap_unload(ss->rx_small.dmat, 3073 ss->rx_small.info[i].map); 3074 m_freem(ss->rx_small.info[i].m); 3075 ss->rx_small.info[i].m = NULL; 3076 } 3077 3078 /* transmit ring used only on the first slice */ 3079 if (ss->tx.info == NULL) 3080 return; 3081 3082 for (i = 0; i <= ss->tx.mask; i++) { 3083 ss->tx.info[i].flag = 0; 3084 if (ss->tx.info[i].m == NULL) 3085 continue; 3086 bus_dmamap_unload(ss->tx.dmat, 3087 ss->tx.info[i].map); 3088 m_freem(ss->tx.info[i].m); 3089 ss->tx.info[i].m = NULL; 3090 } 3091 } 3092 3093 static void 3094 mxge_free_mbufs(mxge_softc_t *sc) 3095 { 3096 int slice; 3097 3098 for (slice = 0; slice < sc->num_slices; slice++) 3099 mxge_free_slice_mbufs(&sc->ss[slice]); 3100 } 3101 3102 static void 3103 mxge_free_slice_rings(struct mxge_slice_state *ss) 3104 { 3105 int i; 3106 3107 if (ss->rx_done.entry != NULL) 3108 mxge_dma_free(&ss->rx_done.dma); 3109 ss->rx_done.entry = NULL; 3110 3111 if (ss->tx.req_bytes != NULL) 3112 free(ss->tx.req_bytes, M_DEVBUF); 3113 ss->tx.req_bytes = NULL; 3114 3115 if (ss->tx.seg_list != NULL) 3116 free(ss->tx.seg_list, M_DEVBUF); 3117 ss->tx.seg_list = NULL; 3118 3119 if (ss->rx_small.shadow != NULL) 3120 free(ss->rx_small.shadow, M_DEVBUF); 3121 ss->rx_small.shadow = NULL; 3122 3123 if (ss->rx_big.shadow != NULL) 3124 free(ss->rx_big.shadow, M_DEVBUF); 3125 ss->rx_big.shadow = NULL; 3126 3127 if (ss->tx.info != NULL) { 3128 if (ss->tx.dmat != NULL) { 3129 for (i = 0; i <= ss->tx.mask; i++) { 3130 bus_dmamap_destroy(ss->tx.dmat, 3131 ss->tx.info[i].map); 3132 } 3133 bus_dma_tag_destroy(ss->tx.dmat); 3134 } 3135 free(ss->tx.info, M_DEVBUF); 3136 } 3137 ss->tx.info = NULL; 3138 3139 if (ss->rx_small.info != NULL) { 3140 if (ss->rx_small.dmat != NULL) { 3141 for (i = 0; i <= ss->rx_small.mask; i++) { 3142 bus_dmamap_destroy(ss->rx_small.dmat, 3143 ss->rx_small.info[i].map); 3144 } 3145 bus_dmamap_destroy(ss->rx_small.dmat, 3146 ss->rx_small.extra_map); 3147 bus_dma_tag_destroy(ss->rx_small.dmat); 3148 } 3149 free(ss->rx_small.info, M_DEVBUF); 3150 } 3151 ss->rx_small.info = NULL; 3152 3153 if (ss->rx_big.info != NULL) { 3154 if (ss->rx_big.dmat != NULL) { 3155 for (i = 0; i <= ss->rx_big.mask; i++) { 3156 bus_dmamap_destroy(ss->rx_big.dmat, 3157 ss->rx_big.info[i].map); 3158 } 3159 bus_dmamap_destroy(ss->rx_big.dmat, 3160 ss->rx_big.extra_map); 3161 bus_dma_tag_destroy(ss->rx_big.dmat); 3162 } 3163 free(ss->rx_big.info, M_DEVBUF); 3164 } 3165 ss->rx_big.info = NULL; 3166 } 3167 3168 static void 3169 mxge_free_rings(mxge_softc_t *sc) 3170 { 3171 int slice; 3172 3173 for (slice = 0; slice < sc->num_slices; slice++) 3174 mxge_free_slice_rings(&sc->ss[slice]); 3175 } 3176 3177 static int 3178 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3179 int tx_ring_entries) 3180 { 3181 mxge_softc_t *sc = ss->sc; 3182 size_t bytes; 3183 int err, i; 3184 3185 /* allocate per-slice receive resources */ 3186 3187 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3188 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3189 3190 /* allocate the rx shadow rings */ 3191 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3192 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3193 3194 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3195 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3196 3197 /* allocate the rx host info rings */ 3198 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3199 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3200 3201 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3202 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3203 3204 /* allocate the rx busdma resources */ 3205 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3206 1, /* alignment */ 3207 4096, /* boundary */ 3208 BUS_SPACE_MAXADDR, /* low */ 3209 BUS_SPACE_MAXADDR, /* high */ 3210 NULL, NULL, /* filter */ 3211 MHLEN, /* maxsize */ 3212 1, /* num segs */ 3213 MHLEN, /* maxsegsize */ 3214 BUS_DMA_ALLOCNOW, /* flags */ 3215 NULL, NULL, /* lock */ 3216 &ss->rx_small.dmat); /* tag */ 3217 if (err != 0) { 3218 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3219 err); 3220 return err; 3221 } 3222 3223 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3224 1, /* alignment */ 3225 0, /* boundary */ 3226 BUS_SPACE_MAXADDR, /* low */ 3227 BUS_SPACE_MAXADDR, /* high */ 3228 NULL, NULL, /* filter */ 3229 3*4096, /* maxsize */ 3230 1, /* num segs */ 3231 MJUM9BYTES, /* maxsegsize*/ 3232 BUS_DMA_ALLOCNOW, /* flags */ 3233 NULL, NULL, /* lock */ 3234 &ss->rx_big.dmat); /* tag */ 3235 if (err != 0) { 3236 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3237 err); 3238 return err; 3239 } 3240 for (i = 0; i <= ss->rx_small.mask; i++) { 3241 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3242 &ss->rx_small.info[i].map); 3243 if (err != 0) { 3244 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3245 err); 3246 return err; 3247 } 3248 } 3249 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3250 &ss->rx_small.extra_map); 3251 if (err != 0) { 3252 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3253 err); 3254 return err; 3255 } 3256 3257 for (i = 0; i <= ss->rx_big.mask; i++) { 3258 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3259 &ss->rx_big.info[i].map); 3260 if (err != 0) { 3261 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3262 err); 3263 return err; 3264 } 3265 } 3266 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3267 &ss->rx_big.extra_map); 3268 if (err != 0) { 3269 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3270 err); 3271 return err; 3272 } 3273 3274 /* now allocate TX resources */ 3275 3276 ss->tx.mask = tx_ring_entries - 1; 3277 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3278 3279 /* allocate the tx request copy block */ 3280 bytes = 8 + 3281 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3282 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3283 /* ensure req_list entries are aligned to 8 bytes */ 3284 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3285 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL); 3286 3287 /* allocate the tx busdma segment list */ 3288 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3289 ss->tx.seg_list = (bus_dma_segment_t *) 3290 malloc(bytes, M_DEVBUF, M_WAITOK); 3291 3292 /* allocate the tx host info ring */ 3293 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3294 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3295 3296 /* allocate the tx busdma resources */ 3297 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3298 1, /* alignment */ 3299 sc->tx_boundary, /* boundary */ 3300 BUS_SPACE_MAXADDR, /* low */ 3301 BUS_SPACE_MAXADDR, /* high */ 3302 NULL, NULL, /* filter */ 3303 65536 + 256, /* maxsize */ 3304 ss->tx.max_desc - 2, /* num segs */ 3305 sc->tx_boundary, /* maxsegsz */ 3306 BUS_DMA_ALLOCNOW, /* flags */ 3307 NULL, NULL, /* lock */ 3308 &ss->tx.dmat); /* tag */ 3309 3310 if (err != 0) { 3311 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3312 err); 3313 return err; 3314 } 3315 3316 /* now use these tags to setup dmamaps for each slot 3317 in the ring */ 3318 for (i = 0; i <= ss->tx.mask; i++) { 3319 err = bus_dmamap_create(ss->tx.dmat, 0, 3320 &ss->tx.info[i].map); 3321 if (err != 0) { 3322 device_printf(sc->dev, "Err %d tx dmamap\n", 3323 err); 3324 return err; 3325 } 3326 } 3327 return 0; 3328 3329 } 3330 3331 static int 3332 mxge_alloc_rings(mxge_softc_t *sc) 3333 { 3334 mxge_cmd_t cmd; 3335 int tx_ring_size; 3336 int tx_ring_entries, rx_ring_entries; 3337 int err, slice; 3338 3339 /* get ring sizes */ 3340 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3341 tx_ring_size = cmd.data0; 3342 if (err != 0) { 3343 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3344 goto abort; 3345 } 3346 3347 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3348 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3349 if_setsendqlen(sc->ifp, tx_ring_entries - 1); 3350 if_setsendqready(sc->ifp); 3351 3352 for (slice = 0; slice < sc->num_slices; slice++) { 3353 err = mxge_alloc_slice_rings(&sc->ss[slice], 3354 rx_ring_entries, 3355 tx_ring_entries); 3356 if (err != 0) 3357 goto abort; 3358 } 3359 return 0; 3360 3361 abort: 3362 mxge_free_rings(sc); 3363 return err; 3364 3365 } 3366 3367 static void 3368 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3369 { 3370 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3371 3372 if (bufsize < MCLBYTES) { 3373 /* easy, everything fits in a single buffer */ 3374 *big_buf_size = MCLBYTES; 3375 *cl_size = MCLBYTES; 3376 *nbufs = 1; 3377 return; 3378 } 3379 3380 if (bufsize < MJUMPAGESIZE) { 3381 /* still easy, everything still fits in a single buffer */ 3382 *big_buf_size = MJUMPAGESIZE; 3383 *cl_size = MJUMPAGESIZE; 3384 *nbufs = 1; 3385 return; 3386 } 3387 *cl_size = MJUM9BYTES; 3388 *big_buf_size = MJUM9BYTES; 3389 *nbufs = 1; 3390 } 3391 3392 static int 3393 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3394 { 3395 mxge_softc_t *sc; 3396 mxge_cmd_t cmd; 3397 bus_dmamap_t map; 3398 int err, i, slice; 3399 3400 sc = ss->sc; 3401 slice = ss - sc->ss; 3402 3403 #if defined(INET) || defined(INET6) 3404 (void)tcp_lro_init(&ss->lc); 3405 #endif 3406 ss->lc.ifp = sc->ifp; 3407 3408 /* get the lanai pointers to the send and receive rings */ 3409 3410 err = 0; 3411 3412 cmd.data0 = slice; 3413 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3414 ss->tx.lanai = 3415 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3416 ss->tx.send_go = (volatile uint32_t *) 3417 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3418 ss->tx.send_stop = (volatile uint32_t *) 3419 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3420 3421 cmd.data0 = slice; 3422 err |= mxge_send_cmd(sc, 3423 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3424 ss->rx_small.lanai = 3425 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3426 cmd.data0 = slice; 3427 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3428 ss->rx_big.lanai = 3429 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3430 3431 if (err != 0) { 3432 device_printf(sc->dev, 3433 "failed to get ring sizes or locations\n"); 3434 return EIO; 3435 } 3436 3437 /* stock receive rings */ 3438 for (i = 0; i <= ss->rx_small.mask; i++) { 3439 map = ss->rx_small.info[i].map; 3440 err = mxge_get_buf_small(ss, map, i); 3441 if (err) { 3442 device_printf(sc->dev, "alloced %d/%d smalls\n", 3443 i, ss->rx_small.mask + 1); 3444 return ENOMEM; 3445 } 3446 } 3447 for (i = 0; i <= ss->rx_big.mask; i++) { 3448 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3449 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3450 } 3451 ss->rx_big.nbufs = nbufs; 3452 ss->rx_big.cl_size = cl_size; 3453 ss->rx_big.mlen = if_getmtu(ss->sc->ifp) + ETHER_HDR_LEN + 3454 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3455 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3456 map = ss->rx_big.info[i].map; 3457 err = mxge_get_buf_big(ss, map, i); 3458 if (err) { 3459 device_printf(sc->dev, "alloced %d/%d bigs\n", 3460 i, ss->rx_big.mask + 1); 3461 return ENOMEM; 3462 } 3463 } 3464 return 0; 3465 } 3466 3467 static int 3468 mxge_open(mxge_softc_t *sc) 3469 { 3470 mxge_cmd_t cmd; 3471 int err, big_bytes, nbufs, slice, cl_size, i; 3472 bus_addr_t bus; 3473 volatile uint8_t *itable; 3474 struct mxge_slice_state *ss; 3475 3476 /* Copy the MAC address in case it was overridden */ 3477 bcopy(if_getlladdr(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3478 3479 err = mxge_reset(sc, 1); 3480 if (err != 0) { 3481 device_printf(sc->dev, "failed to reset\n"); 3482 return EIO; 3483 } 3484 3485 if (sc->num_slices > 1) { 3486 /* setup the indirection table */ 3487 cmd.data0 = sc->num_slices; 3488 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3489 &cmd); 3490 3491 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3492 &cmd); 3493 if (err != 0) { 3494 device_printf(sc->dev, 3495 "failed to setup rss tables\n"); 3496 return err; 3497 } 3498 3499 /* just enable an identity mapping */ 3500 itable = sc->sram + cmd.data0; 3501 for (i = 0; i < sc->num_slices; i++) 3502 itable[i] = (uint8_t)i; 3503 3504 cmd.data0 = 1; 3505 cmd.data1 = mxge_rss_hash_type; 3506 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3507 if (err != 0) { 3508 device_printf(sc->dev, "failed to enable slices\n"); 3509 return err; 3510 } 3511 } 3512 3513 mxge_choose_params(if_getmtu(sc->ifp), &big_bytes, &cl_size, &nbufs); 3514 3515 cmd.data0 = nbufs; 3516 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3517 &cmd); 3518 /* error is only meaningful if we're trying to set 3519 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3520 if (err && nbufs > 1) { 3521 device_printf(sc->dev, 3522 "Failed to set alway-use-n to %d\n", 3523 nbufs); 3524 return EIO; 3525 } 3526 /* Give the firmware the mtu and the big and small buffer 3527 sizes. The firmware wants the big buf size to be a power 3528 of two. Luckily, FreeBSD's clusters are powers of two */ 3529 cmd.data0 = if_getmtu(sc->ifp) + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3530 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3531 cmd.data0 = MHLEN - MXGEFW_PAD; 3532 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3533 &cmd); 3534 cmd.data0 = big_bytes; 3535 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3536 3537 if (err != 0) { 3538 device_printf(sc->dev, "failed to setup params\n"); 3539 goto abort; 3540 } 3541 3542 /* Now give him the pointer to the stats block */ 3543 for (slice = 0; slice < sc->num_slices; slice++) { 3544 ss = &sc->ss[slice]; 3545 cmd.data0 = 3546 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3547 cmd.data1 = 3548 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3549 cmd.data2 = sizeof(struct mcp_irq_data); 3550 cmd.data2 |= (slice << 16); 3551 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3552 } 3553 3554 if (err != 0) { 3555 bus = sc->ss->fw_stats_dma.bus_addr; 3556 bus += offsetof(struct mcp_irq_data, send_done_count); 3557 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3558 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3559 err = mxge_send_cmd(sc, 3560 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3561 &cmd); 3562 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3563 sc->fw_multicast_support = 0; 3564 } else { 3565 sc->fw_multicast_support = 1; 3566 } 3567 3568 if (err != 0) { 3569 device_printf(sc->dev, "failed to setup params\n"); 3570 goto abort; 3571 } 3572 3573 for (slice = 0; slice < sc->num_slices; slice++) { 3574 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3575 if (err != 0) { 3576 device_printf(sc->dev, "couldn't open slice %d\n", 3577 slice); 3578 goto abort; 3579 } 3580 } 3581 3582 /* Finally, start the firmware running */ 3583 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3584 if (err) { 3585 device_printf(sc->dev, "Couldn't bring up link\n"); 3586 goto abort; 3587 } 3588 for (slice = 0; slice < sc->num_slices; slice++) { 3589 ss = &sc->ss[slice]; 3590 ss->if_drv_flags |= IFF_DRV_RUNNING; 3591 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3592 } 3593 if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, 0); 3594 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_OACTIVE); 3595 3596 return 0; 3597 3598 abort: 3599 mxge_free_mbufs(sc); 3600 3601 return err; 3602 } 3603 3604 static int 3605 mxge_close(mxge_softc_t *sc, int down) 3606 { 3607 mxge_cmd_t cmd; 3608 int err, old_down_cnt; 3609 struct mxge_slice_state *ss; 3610 int slice; 3611 3612 for (slice = 0; slice < sc->num_slices; slice++) { 3613 ss = &sc->ss[slice]; 3614 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3615 } 3616 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING); 3617 if (!down) { 3618 old_down_cnt = sc->down_cnt; 3619 wmb(); 3620 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3621 if (err) { 3622 device_printf(sc->dev, 3623 "Couldn't bring down link\n"); 3624 } 3625 if (old_down_cnt == sc->down_cnt) { 3626 /* wait for down irq */ 3627 DELAY(10 * sc->intr_coal_delay); 3628 } 3629 wmb(); 3630 if (old_down_cnt == sc->down_cnt) { 3631 device_printf(sc->dev, "never got down irq\n"); 3632 } 3633 } 3634 mxge_free_mbufs(sc); 3635 3636 return 0; 3637 } 3638 3639 static void 3640 mxge_setup_cfg_space(mxge_softc_t *sc) 3641 { 3642 device_t dev = sc->dev; 3643 int reg; 3644 uint16_t lnk, pectl; 3645 3646 /* find the PCIe link width and set max read request to 4KB*/ 3647 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3648 lnk = pci_read_config(dev, reg + 0x12, 2); 3649 sc->link_width = (lnk >> 4) & 0x3f; 3650 3651 if (sc->pectl == 0) { 3652 pectl = pci_read_config(dev, reg + 0x8, 2); 3653 pectl = (pectl & ~0x7000) | (5 << 12); 3654 pci_write_config(dev, reg + 0x8, pectl, 2); 3655 sc->pectl = pectl; 3656 } else { 3657 /* restore saved pectl after watchdog reset */ 3658 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3659 } 3660 } 3661 3662 /* Enable DMA and Memory space access */ 3663 pci_enable_busmaster(dev); 3664 } 3665 3666 static uint32_t 3667 mxge_read_reboot(mxge_softc_t *sc) 3668 { 3669 device_t dev = sc->dev; 3670 uint32_t vs; 3671 3672 /* find the vendor specific offset */ 3673 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3674 device_printf(sc->dev, 3675 "could not find vendor specific offset\n"); 3676 return (uint32_t)-1; 3677 } 3678 /* enable read32 mode */ 3679 pci_write_config(dev, vs + 0x10, 0x3, 1); 3680 /* tell NIC which register to read */ 3681 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3682 return (pci_read_config(dev, vs + 0x14, 4)); 3683 } 3684 3685 static void 3686 mxge_watchdog_reset(mxge_softc_t *sc) 3687 { 3688 struct pci_devinfo *dinfo; 3689 struct mxge_slice_state *ss; 3690 int err, running, s, num_tx_slices = 1; 3691 uint32_t reboot; 3692 uint16_t cmd; 3693 3694 err = ENXIO; 3695 3696 device_printf(sc->dev, "Watchdog reset!\n"); 3697 3698 /* 3699 * check to see if the NIC rebooted. If it did, then all of 3700 * PCI config space has been reset, and things like the 3701 * busmaster bit will be zero. If this is the case, then we 3702 * must restore PCI config space before the NIC can be used 3703 * again 3704 */ 3705 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3706 if (cmd == 0xffff) { 3707 /* 3708 * maybe the watchdog caught the NIC rebooting; wait 3709 * up to 100ms for it to finish. If it does not come 3710 * back, then give up 3711 */ 3712 DELAY(1000*100); 3713 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3714 if (cmd == 0xffff) { 3715 device_printf(sc->dev, "NIC disappeared!\n"); 3716 } 3717 } 3718 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3719 /* print the reboot status */ 3720 reboot = mxge_read_reboot(sc); 3721 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3722 reboot); 3723 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING; 3724 if (running) { 3725 /* 3726 * quiesce NIC so that TX routines will not try to 3727 * xmit after restoration of BAR 3728 */ 3729 3730 /* Mark the link as down */ 3731 if (sc->link_state) { 3732 sc->link_state = 0; 3733 if_link_state_change(sc->ifp, 3734 LINK_STATE_DOWN); 3735 } 3736 3737 num_tx_slices = sc->num_slices; 3738 3739 /* grab all TX locks to ensure no tx */ 3740 for (s = 0; s < num_tx_slices; s++) { 3741 ss = &sc->ss[s]; 3742 mtx_lock(&ss->tx.mtx); 3743 } 3744 mxge_close(sc, 1); 3745 } 3746 /* restore PCI configuration space */ 3747 dinfo = device_get_ivars(sc->dev); 3748 pci_cfg_restore(sc->dev, dinfo); 3749 3750 /* and redo any changes we made to our config space */ 3751 mxge_setup_cfg_space(sc); 3752 3753 /* reload f/w */ 3754 err = mxge_load_firmware(sc, 0); 3755 if (err) { 3756 device_printf(sc->dev, 3757 "Unable to re-load f/w\n"); 3758 } 3759 if (running) { 3760 if (!err) 3761 err = mxge_open(sc); 3762 /* release all TX locks */ 3763 for (s = 0; s < num_tx_slices; s++) { 3764 ss = &sc->ss[s]; 3765 mxge_start_locked(ss); 3766 mtx_unlock(&ss->tx.mtx); 3767 } 3768 } 3769 sc->watchdog_resets++; 3770 } else { 3771 device_printf(sc->dev, 3772 "NIC did not reboot, not resetting\n"); 3773 err = 0; 3774 } 3775 if (err) { 3776 device_printf(sc->dev, "watchdog reset failed\n"); 3777 } else { 3778 if (sc->dying == 2) 3779 sc->dying = 0; 3780 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3781 } 3782 } 3783 3784 static void 3785 mxge_watchdog_task(void *arg, int pending) 3786 { 3787 mxge_softc_t *sc = arg; 3788 3789 mtx_lock(&sc->driver_mtx); 3790 mxge_watchdog_reset(sc); 3791 mtx_unlock(&sc->driver_mtx); 3792 } 3793 3794 static void 3795 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3796 { 3797 tx = &sc->ss[slice].tx; 3798 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3799 device_printf(sc->dev, 3800 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3801 tx->req, tx->done, tx->queue_active); 3802 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3803 tx->activate, tx->deactivate); 3804 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3805 tx->pkt_done, 3806 be32toh(sc->ss->fw_stats->send_done_count)); 3807 } 3808 3809 static int 3810 mxge_watchdog(mxge_softc_t *sc) 3811 { 3812 mxge_tx_ring_t *tx; 3813 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3814 int i, err = 0; 3815 3816 /* see if we have outstanding transmits, which 3817 have been pending for more than mxge_ticks */ 3818 for (i = 0; (i < sc->num_slices) && (err == 0); i++) { 3819 tx = &sc->ss[i].tx; 3820 if (tx->req != tx->done && 3821 tx->watchdog_req != tx->watchdog_done && 3822 tx->done == tx->watchdog_done) { 3823 /* check for pause blocking before resetting */ 3824 if (tx->watchdog_rx_pause == rx_pause) { 3825 mxge_warn_stuck(sc, tx, i); 3826 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3827 return (ENXIO); 3828 } 3829 else 3830 device_printf(sc->dev, "Flow control blocking " 3831 "xmits, check link partner\n"); 3832 } 3833 3834 tx->watchdog_req = tx->req; 3835 tx->watchdog_done = tx->done; 3836 tx->watchdog_rx_pause = rx_pause; 3837 } 3838 3839 if (sc->need_media_probe) 3840 mxge_media_probe(sc); 3841 return (err); 3842 } 3843 3844 static uint64_t 3845 mxge_get_counter(if_t ifp, ift_counter cnt) 3846 { 3847 struct mxge_softc *sc; 3848 uint64_t rv; 3849 3850 sc = if_getsoftc(ifp); 3851 rv = 0; 3852 3853 switch (cnt) { 3854 case IFCOUNTER_IPACKETS: 3855 for (int s = 0; s < sc->num_slices; s++) 3856 rv += sc->ss[s].ipackets; 3857 return (rv); 3858 case IFCOUNTER_OPACKETS: 3859 for (int s = 0; s < sc->num_slices; s++) 3860 rv += sc->ss[s].opackets; 3861 return (rv); 3862 case IFCOUNTER_OERRORS: 3863 for (int s = 0; s < sc->num_slices; s++) 3864 rv += sc->ss[s].oerrors; 3865 return (rv); 3866 case IFCOUNTER_OBYTES: 3867 for (int s = 0; s < sc->num_slices; s++) 3868 rv += sc->ss[s].obytes; 3869 return (rv); 3870 case IFCOUNTER_OMCASTS: 3871 for (int s = 0; s < sc->num_slices; s++) 3872 rv += sc->ss[s].omcasts; 3873 return (rv); 3874 case IFCOUNTER_OQDROPS: 3875 for (int s = 0; s < sc->num_slices; s++) 3876 rv += sc->ss[s].tx.br->br_drops; 3877 return (rv); 3878 default: 3879 return (if_get_counter_default(ifp, cnt)); 3880 } 3881 } 3882 3883 static void 3884 mxge_tick(void *arg) 3885 { 3886 mxge_softc_t *sc = arg; 3887 u_long pkts = 0; 3888 int err = 0; 3889 int running, ticks; 3890 uint16_t cmd; 3891 3892 ticks = mxge_ticks; 3893 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING; 3894 if (running) { 3895 if (!sc->watchdog_countdown) { 3896 err = mxge_watchdog(sc); 3897 sc->watchdog_countdown = 4; 3898 } 3899 sc->watchdog_countdown--; 3900 } 3901 if (pkts == 0) { 3902 /* ensure NIC did not suffer h/w fault while idle */ 3903 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3904 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3905 sc->dying = 2; 3906 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3907 err = ENXIO; 3908 } 3909 /* look less often if NIC is idle */ 3910 ticks *= 4; 3911 } 3912 3913 if (err == 0) 3914 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3915 3916 } 3917 3918 static int 3919 mxge_media_change(if_t ifp) 3920 { 3921 return EINVAL; 3922 } 3923 3924 static int 3925 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3926 { 3927 if_t ifp = sc->ifp; 3928 int real_mtu, old_mtu; 3929 int err = 0; 3930 3931 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3932 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3933 return EINVAL; 3934 mtx_lock(&sc->driver_mtx); 3935 old_mtu = if_getmtu(ifp); 3936 if_setmtu(ifp, mtu); 3937 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3938 mxge_close(sc, 0); 3939 err = mxge_open(sc); 3940 if (err != 0) { 3941 if_setmtu(ifp, old_mtu); 3942 mxge_close(sc, 0); 3943 (void) mxge_open(sc); 3944 } 3945 } 3946 mtx_unlock(&sc->driver_mtx); 3947 return err; 3948 } 3949 3950 static void 3951 mxge_media_status(if_t ifp, struct ifmediareq *ifmr) 3952 { 3953 mxge_softc_t *sc = if_getsoftc(ifp); 3954 3955 if (sc == NULL) 3956 return; 3957 ifmr->ifm_status = IFM_AVALID; 3958 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3959 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3960 ifmr->ifm_active |= sc->current_media; 3961 } 3962 3963 static int 3964 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 3965 { 3966 mxge_cmd_t cmd; 3967 uint32_t i2c_args; 3968 int i, ms, err; 3969 3970 if (i2c->dev_addr != 0xA0 && 3971 i2c->dev_addr != 0xA2) 3972 return (EINVAL); 3973 if (i2c->len > sizeof(i2c->data)) 3974 return (EINVAL); 3975 3976 for (i = 0; i < i2c->len; i++) { 3977 i2c_args = i2c->dev_addr << 0x8; 3978 i2c_args |= i2c->offset + i; 3979 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3980 cmd.data1 = i2c_args; 3981 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3982 3983 if (err != MXGEFW_CMD_OK) 3984 return (EIO); 3985 /* now we wait for the data to be cached */ 3986 cmd.data0 = i2c_args & 0xff; 3987 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3988 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3989 cmd.data0 = i2c_args & 0xff; 3990 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3991 if (err == EBUSY) 3992 DELAY(1000); 3993 } 3994 if (err != MXGEFW_CMD_OK) 3995 return (EIO); 3996 i2c->data[i] = cmd.data0; 3997 } 3998 return (0); 3999 } 4000 4001 static int 4002 mxge_ioctl(if_t ifp, u_long command, caddr_t data) 4003 { 4004 mxge_softc_t *sc = if_getsoftc(ifp); 4005 struct ifreq *ifr = (struct ifreq *)data; 4006 struct ifi2creq i2c; 4007 int err, mask; 4008 4009 err = 0; 4010 switch (command) { 4011 case SIOCSIFMTU: 4012 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4013 break; 4014 4015 case SIOCSIFFLAGS: 4016 mtx_lock(&sc->driver_mtx); 4017 if (sc->dying) { 4018 mtx_unlock(&sc->driver_mtx); 4019 return EINVAL; 4020 } 4021 if (if_getflags(ifp) & IFF_UP) { 4022 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { 4023 err = mxge_open(sc); 4024 } else { 4025 /* take care of promis can allmulti 4026 flag chages */ 4027 mxge_change_promisc(sc, 4028 if_getflags(ifp) & IFF_PROMISC); 4029 mxge_set_multicast_list(sc); 4030 } 4031 } else { 4032 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4033 mxge_close(sc, 0); 4034 } 4035 } 4036 mtx_unlock(&sc->driver_mtx); 4037 break; 4038 4039 case SIOCADDMULTI: 4040 case SIOCDELMULTI: 4041 mtx_lock(&sc->driver_mtx); 4042 if (sc->dying) { 4043 mtx_unlock(&sc->driver_mtx); 4044 return (EINVAL); 4045 } 4046 mxge_set_multicast_list(sc); 4047 mtx_unlock(&sc->driver_mtx); 4048 break; 4049 4050 case SIOCSIFCAP: 4051 mtx_lock(&sc->driver_mtx); 4052 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); 4053 if (mask & IFCAP_TXCSUM) { 4054 if (IFCAP_TXCSUM & if_getcapenable(ifp)) { 4055 mask &= ~IFCAP_TSO4; 4056 if_setcapenablebit(ifp, 0, (IFCAP_TXCSUM|IFCAP_TSO4)); 4057 if_sethwassistbits(ifp, 0, (CSUM_TCP | CSUM_UDP)); 4058 } else { 4059 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0); 4060 if_sethwassistbits(ifp, (CSUM_TCP | CSUM_UDP), 0); 4061 } 4062 } 4063 if (mask & IFCAP_RXCSUM) { 4064 if (IFCAP_RXCSUM & if_getcapenable(ifp)) { 4065 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM); 4066 } else { 4067 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 4068 } 4069 } 4070 if (mask & IFCAP_TSO4) { 4071 if (IFCAP_TSO4 & if_getcapenable(ifp)) { 4072 if_setcapenablebit(ifp, 0, IFCAP_TSO4); 4073 } else if (IFCAP_TXCSUM & if_getcapenable(ifp)) { 4074 if_setcapenablebit(ifp, IFCAP_TSO4, 0); 4075 if_sethwassistbits(ifp, CSUM_TSO, 0); 4076 } else { 4077 printf("mxge requires tx checksum offload" 4078 " be enabled to use TSO\n"); 4079 err = EINVAL; 4080 } 4081 } 4082 #if IFCAP_TSO6 4083 if (mask & IFCAP_TXCSUM_IPV6) { 4084 if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) { 4085 mask &= ~IFCAP_TSO6; 4086 if_setcapenablebit(ifp, 0, 4087 IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); 4088 if_sethwassistbits(ifp, 0, 4089 CSUM_TCP_IPV6 | CSUM_UDP); 4090 } else { 4091 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0); 4092 if_sethwassistbits(ifp, 4093 CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0); 4094 } 4095 } 4096 if (mask & IFCAP_RXCSUM_IPV6) { 4097 if (IFCAP_RXCSUM_IPV6 & if_getcapenable(ifp)) { 4098 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6); 4099 } else { 4100 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); 4101 } 4102 } 4103 if (mask & IFCAP_TSO6) { 4104 if (IFCAP_TSO6 & if_getcapenable(ifp)) { 4105 if_setcapenablebit(ifp, 0, IFCAP_TSO6); 4106 } else if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) { 4107 if_setcapenablebit(ifp, IFCAP_TSO6, 0); 4108 if_sethwassistbits(ifp, CSUM_TSO, 0); 4109 } else { 4110 printf("mxge requires tx checksum offload" 4111 " be enabled to use TSO\n"); 4112 err = EINVAL; 4113 } 4114 } 4115 #endif /*IFCAP_TSO6 */ 4116 4117 if (mask & IFCAP_LRO) 4118 if_togglecapenable(ifp, IFCAP_LRO); 4119 if (mask & IFCAP_VLAN_HWTAGGING) 4120 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); 4121 if (mask & IFCAP_VLAN_HWTSO) 4122 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 4123 4124 if (!(if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) || 4125 !(if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)) 4126 if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO); 4127 4128 mtx_unlock(&sc->driver_mtx); 4129 VLAN_CAPABILITIES(ifp); 4130 4131 break; 4132 4133 case SIOCGIFMEDIA: 4134 mtx_lock(&sc->driver_mtx); 4135 if (sc->dying) { 4136 mtx_unlock(&sc->driver_mtx); 4137 return (EINVAL); 4138 } 4139 mxge_media_probe(sc); 4140 mtx_unlock(&sc->driver_mtx); 4141 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4142 &sc->media, command); 4143 break; 4144 4145 case SIOCGI2C: 4146 if (sc->connector != MXGE_XFP && 4147 sc->connector != MXGE_SFP) { 4148 err = ENXIO; 4149 break; 4150 } 4151 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4152 if (err != 0) 4153 break; 4154 mtx_lock(&sc->driver_mtx); 4155 if (sc->dying) { 4156 mtx_unlock(&sc->driver_mtx); 4157 return (EINVAL); 4158 } 4159 err = mxge_fetch_i2c(sc, &i2c); 4160 mtx_unlock(&sc->driver_mtx); 4161 if (err == 0) 4162 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4163 sizeof(i2c)); 4164 break; 4165 default: 4166 err = ether_ioctl(ifp, command, data); 4167 break; 4168 } 4169 return err; 4170 } 4171 4172 static void 4173 mxge_fetch_tunables(mxge_softc_t *sc) 4174 { 4175 4176 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4177 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4178 &mxge_flow_control); 4179 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4180 &mxge_intr_coal_delay); 4181 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4182 &mxge_nvidia_ecrc_enable); 4183 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4184 &mxge_force_firmware); 4185 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4186 &mxge_deassert_wait); 4187 TUNABLE_INT_FETCH("hw.mxge.verbose", 4188 &mxge_verbose); 4189 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4190 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4191 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4192 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4193 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4194 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4195 4196 if (bootverbose) 4197 mxge_verbose = 1; 4198 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4199 mxge_intr_coal_delay = 30; 4200 if (mxge_ticks == 0) 4201 mxge_ticks = hz / 2; 4202 sc->pause = mxge_flow_control; 4203 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4204 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4205 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4206 } 4207 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4208 mxge_initial_mtu < ETHER_MIN_LEN) 4209 mxge_initial_mtu = ETHERMTU_JUMBO; 4210 4211 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4212 mxge_throttle = MXGE_MAX_THROTTLE; 4213 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4214 mxge_throttle = MXGE_MIN_THROTTLE; 4215 sc->throttle = mxge_throttle; 4216 } 4217 4218 static void 4219 mxge_free_slices(mxge_softc_t *sc) 4220 { 4221 struct mxge_slice_state *ss; 4222 int i; 4223 4224 if (sc->ss == NULL) 4225 return; 4226 4227 for (i = 0; i < sc->num_slices; i++) { 4228 ss = &sc->ss[i]; 4229 if (ss->fw_stats != NULL) { 4230 mxge_dma_free(&ss->fw_stats_dma); 4231 ss->fw_stats = NULL; 4232 if (ss->tx.br != NULL) { 4233 drbr_free(ss->tx.br, M_DEVBUF); 4234 ss->tx.br = NULL; 4235 } 4236 mtx_destroy(&ss->tx.mtx); 4237 } 4238 if (ss->rx_done.entry != NULL) { 4239 mxge_dma_free(&ss->rx_done.dma); 4240 ss->rx_done.entry = NULL; 4241 } 4242 } 4243 free(sc->ss, M_DEVBUF); 4244 sc->ss = NULL; 4245 } 4246 4247 static int 4248 mxge_alloc_slices(mxge_softc_t *sc) 4249 { 4250 mxge_cmd_t cmd; 4251 struct mxge_slice_state *ss; 4252 size_t bytes; 4253 int err, i, max_intr_slots; 4254 4255 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4256 if (err != 0) { 4257 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4258 return err; 4259 } 4260 sc->rx_ring_size = cmd.data0; 4261 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4262 4263 bytes = sizeof (*sc->ss) * sc->num_slices; 4264 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4265 if (sc->ss == NULL) 4266 return (ENOMEM); 4267 for (i = 0; i < sc->num_slices; i++) { 4268 ss = &sc->ss[i]; 4269 4270 ss->sc = sc; 4271 4272 /* allocate per-slice rx interrupt queues */ 4273 4274 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4275 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4276 if (err != 0) 4277 goto abort; 4278 ss->rx_done.entry = ss->rx_done.dma.addr; 4279 bzero(ss->rx_done.entry, bytes); 4280 4281 /* 4282 * allocate the per-slice firmware stats; stats 4283 * (including tx) are used used only on the first 4284 * slice for now 4285 */ 4286 4287 bytes = sizeof (*ss->fw_stats); 4288 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4289 sizeof (*ss->fw_stats), 64); 4290 if (err != 0) 4291 goto abort; 4292 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4293 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4294 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4295 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4296 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4297 &ss->tx.mtx); 4298 } 4299 4300 return (0); 4301 4302 abort: 4303 mxge_free_slices(sc); 4304 return (ENOMEM); 4305 } 4306 4307 static void 4308 mxge_slice_probe(mxge_softc_t *sc) 4309 { 4310 mxge_cmd_t cmd; 4311 char *old_fw; 4312 int msix_cnt, status, max_intr_slots; 4313 4314 sc->num_slices = 1; 4315 /* 4316 * don't enable multiple slices if they are not enabled, 4317 * or if this is not an SMP system 4318 */ 4319 4320 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4321 return; 4322 4323 /* see how many MSI-X interrupts are available */ 4324 msix_cnt = pci_msix_count(sc->dev); 4325 if (msix_cnt < 2) 4326 return; 4327 4328 /* now load the slice aware firmware see what it supports */ 4329 old_fw = sc->fw_name; 4330 if (old_fw == mxge_fw_aligned) 4331 sc->fw_name = mxge_fw_rss_aligned; 4332 else 4333 sc->fw_name = mxge_fw_rss_unaligned; 4334 status = mxge_load_firmware(sc, 0); 4335 if (status != 0) { 4336 device_printf(sc->dev, "Falling back to a single slice\n"); 4337 return; 4338 } 4339 4340 /* try to send a reset command to the card to see if it 4341 is alive */ 4342 memset(&cmd, 0, sizeof (cmd)); 4343 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4344 if (status != 0) { 4345 device_printf(sc->dev, "failed reset\n"); 4346 goto abort_with_fw; 4347 } 4348 4349 /* get rx ring size */ 4350 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4351 if (status != 0) { 4352 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4353 goto abort_with_fw; 4354 } 4355 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4356 4357 /* tell it the size of the interrupt queues */ 4358 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4359 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4360 if (status != 0) { 4361 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4362 goto abort_with_fw; 4363 } 4364 4365 /* ask the maximum number of slices it supports */ 4366 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4367 if (status != 0) { 4368 device_printf(sc->dev, 4369 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4370 goto abort_with_fw; 4371 } 4372 sc->num_slices = cmd.data0; 4373 if (sc->num_slices > msix_cnt) 4374 sc->num_slices = msix_cnt; 4375 4376 if (mxge_max_slices == -1) { 4377 /* cap to number of CPUs in system */ 4378 if (sc->num_slices > mp_ncpus) 4379 sc->num_slices = mp_ncpus; 4380 } else { 4381 if (sc->num_slices > mxge_max_slices) 4382 sc->num_slices = mxge_max_slices; 4383 } 4384 /* make sure it is a power of two */ 4385 while (sc->num_slices & (sc->num_slices - 1)) 4386 sc->num_slices--; 4387 4388 if (mxge_verbose) 4389 device_printf(sc->dev, "using %d slices\n", 4390 sc->num_slices); 4391 4392 return; 4393 4394 abort_with_fw: 4395 sc->fw_name = old_fw; 4396 (void) mxge_load_firmware(sc, 0); 4397 } 4398 4399 static int 4400 mxge_add_msix_irqs(mxge_softc_t *sc) 4401 { 4402 size_t bytes; 4403 int count, err, i, rid; 4404 4405 rid = PCIR_BAR(2); 4406 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4407 &rid, RF_ACTIVE); 4408 4409 if (sc->msix_table_res == NULL) { 4410 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4411 return ENXIO; 4412 } 4413 4414 count = sc->num_slices; 4415 err = pci_alloc_msix(sc->dev, &count); 4416 if (err != 0) { 4417 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4418 "err = %d \n", sc->num_slices, err); 4419 goto abort_with_msix_table; 4420 } 4421 if (count < sc->num_slices) { 4422 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4423 count, sc->num_slices); 4424 device_printf(sc->dev, 4425 "Try setting hw.mxge.max_slices to %d\n", 4426 count); 4427 err = ENOSPC; 4428 goto abort_with_msix; 4429 } 4430 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4431 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4432 if (sc->msix_irq_res == NULL) { 4433 err = ENOMEM; 4434 goto abort_with_msix; 4435 } 4436 4437 for (i = 0; i < sc->num_slices; i++) { 4438 rid = i + 1; 4439 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4440 SYS_RES_IRQ, 4441 &rid, RF_ACTIVE); 4442 if (sc->msix_irq_res[i] == NULL) { 4443 device_printf(sc->dev, "couldn't allocate IRQ res" 4444 " for message %d\n", i); 4445 err = ENXIO; 4446 goto abort_with_res; 4447 } 4448 } 4449 4450 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4451 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4452 4453 for (i = 0; i < sc->num_slices; i++) { 4454 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4455 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4456 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4457 if (err != 0) { 4458 device_printf(sc->dev, "couldn't setup intr for " 4459 "message %d\n", i); 4460 goto abort_with_intr; 4461 } 4462 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4463 sc->msix_ih[i], "s%d", i); 4464 } 4465 4466 if (mxge_verbose) { 4467 device_printf(sc->dev, "using %d msix IRQs:", 4468 sc->num_slices); 4469 for (i = 0; i < sc->num_slices; i++) 4470 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4471 printf("\n"); 4472 } 4473 return (0); 4474 4475 abort_with_intr: 4476 for (i = 0; i < sc->num_slices; i++) { 4477 if (sc->msix_ih[i] != NULL) { 4478 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4479 sc->msix_ih[i]); 4480 sc->msix_ih[i] = NULL; 4481 } 4482 } 4483 free(sc->msix_ih, M_DEVBUF); 4484 4485 abort_with_res: 4486 for (i = 0; i < sc->num_slices; i++) { 4487 rid = i + 1; 4488 if (sc->msix_irq_res[i] != NULL) 4489 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4490 sc->msix_irq_res[i]); 4491 sc->msix_irq_res[i] = NULL; 4492 } 4493 free(sc->msix_irq_res, M_DEVBUF); 4494 4495 abort_with_msix: 4496 pci_release_msi(sc->dev); 4497 4498 abort_with_msix_table: 4499 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4500 sc->msix_table_res); 4501 4502 return err; 4503 } 4504 4505 static int 4506 mxge_add_single_irq(mxge_softc_t *sc) 4507 { 4508 int count, err, rid; 4509 4510 count = pci_msi_count(sc->dev); 4511 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4512 rid = 1; 4513 } else { 4514 rid = 0; 4515 sc->legacy_irq = 1; 4516 } 4517 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4518 RF_SHAREABLE | RF_ACTIVE); 4519 if (sc->irq_res == NULL) { 4520 device_printf(sc->dev, "could not alloc interrupt\n"); 4521 return ENXIO; 4522 } 4523 if (mxge_verbose) 4524 device_printf(sc->dev, "using %s irq %jd\n", 4525 sc->legacy_irq ? "INTx" : "MSI", 4526 rman_get_start(sc->irq_res)); 4527 err = bus_setup_intr(sc->dev, sc->irq_res, 4528 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4529 mxge_intr, &sc->ss[0], &sc->ih); 4530 if (err != 0) { 4531 bus_release_resource(sc->dev, SYS_RES_IRQ, 4532 sc->legacy_irq ? 0 : 1, sc->irq_res); 4533 if (!sc->legacy_irq) 4534 pci_release_msi(sc->dev); 4535 } 4536 return err; 4537 } 4538 4539 static void 4540 mxge_rem_msix_irqs(mxge_softc_t *sc) 4541 { 4542 int i, rid; 4543 4544 for (i = 0; i < sc->num_slices; i++) { 4545 if (sc->msix_ih[i] != NULL) { 4546 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4547 sc->msix_ih[i]); 4548 sc->msix_ih[i] = NULL; 4549 } 4550 } 4551 free(sc->msix_ih, M_DEVBUF); 4552 4553 for (i = 0; i < sc->num_slices; i++) { 4554 rid = i + 1; 4555 if (sc->msix_irq_res[i] != NULL) 4556 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4557 sc->msix_irq_res[i]); 4558 sc->msix_irq_res[i] = NULL; 4559 } 4560 free(sc->msix_irq_res, M_DEVBUF); 4561 4562 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4563 sc->msix_table_res); 4564 4565 pci_release_msi(sc->dev); 4566 return; 4567 } 4568 4569 static void 4570 mxge_rem_single_irq(mxge_softc_t *sc) 4571 { 4572 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4573 bus_release_resource(sc->dev, SYS_RES_IRQ, 4574 sc->legacy_irq ? 0 : 1, sc->irq_res); 4575 if (!sc->legacy_irq) 4576 pci_release_msi(sc->dev); 4577 } 4578 4579 static void 4580 mxge_rem_irq(mxge_softc_t *sc) 4581 { 4582 if (sc->num_slices > 1) 4583 mxge_rem_msix_irqs(sc); 4584 else 4585 mxge_rem_single_irq(sc); 4586 } 4587 4588 static int 4589 mxge_add_irq(mxge_softc_t *sc) 4590 { 4591 int err; 4592 4593 if (sc->num_slices > 1) 4594 err = mxge_add_msix_irqs(sc); 4595 else 4596 err = mxge_add_single_irq(sc); 4597 4598 if (0 && err == 0 && sc->num_slices > 1) { 4599 mxge_rem_msix_irqs(sc); 4600 err = mxge_add_msix_irqs(sc); 4601 } 4602 return err; 4603 } 4604 4605 static int 4606 mxge_attach(device_t dev) 4607 { 4608 mxge_cmd_t cmd; 4609 mxge_softc_t *sc = device_get_softc(dev); 4610 if_t ifp; 4611 int err, rid; 4612 4613 sc->dev = dev; 4614 mxge_fetch_tunables(sc); 4615 4616 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4617 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4618 taskqueue_thread_enqueue, &sc->tq); 4619 if (sc->tq == NULL) { 4620 err = ENOMEM; 4621 goto abort_with_nothing; 4622 } 4623 4624 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4625 1, /* alignment */ 4626 0, /* boundary */ 4627 BUS_SPACE_MAXADDR, /* low */ 4628 BUS_SPACE_MAXADDR, /* high */ 4629 NULL, NULL, /* filter */ 4630 65536 + 256, /* maxsize */ 4631 MXGE_MAX_SEND_DESC, /* num segs */ 4632 65536, /* maxsegsize */ 4633 0, /* flags */ 4634 NULL, NULL, /* lock */ 4635 &sc->parent_dmat); /* tag */ 4636 4637 if (err != 0) { 4638 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4639 err); 4640 goto abort_with_tq; 4641 } 4642 4643 ifp = sc->ifp = if_alloc(IFT_ETHER); 4644 if (ifp == NULL) { 4645 device_printf(dev, "can not if_alloc()\n"); 4646 err = ENOSPC; 4647 goto abort_with_parent_dmat; 4648 } 4649 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4650 4651 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4652 device_get_nameunit(dev)); 4653 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4654 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4655 "%s:drv", device_get_nameunit(dev)); 4656 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4657 MTX_NETWORK_LOCK, MTX_DEF); 4658 4659 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4660 4661 mxge_setup_cfg_space(sc); 4662 4663 /* Map the board into the kernel */ 4664 rid = PCIR_BARS; 4665 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4666 RF_ACTIVE); 4667 if (sc->mem_res == NULL) { 4668 device_printf(dev, "could not map memory\n"); 4669 err = ENXIO; 4670 goto abort_with_lock; 4671 } 4672 sc->sram = rman_get_virtual(sc->mem_res); 4673 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4674 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4675 device_printf(dev, "impossible memory region size %jd\n", 4676 rman_get_size(sc->mem_res)); 4677 err = ENXIO; 4678 goto abort_with_mem_res; 4679 } 4680 4681 /* make NULL terminated copy of the EEPROM strings section of 4682 lanai SRAM */ 4683 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4684 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4685 rman_get_bushandle(sc->mem_res), 4686 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4687 sc->eeprom_strings, 4688 MXGE_EEPROM_STRINGS_SIZE - 2); 4689 err = mxge_parse_strings(sc); 4690 if (err != 0) 4691 goto abort_with_mem_res; 4692 4693 /* Enable write combining for efficient use of PCIe bus */ 4694 mxge_enable_wc(sc); 4695 4696 /* Allocate the out of band dma memory */ 4697 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4698 sizeof (mxge_cmd_t), 64); 4699 if (err != 0) 4700 goto abort_with_mem_res; 4701 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4702 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4703 if (err != 0) 4704 goto abort_with_cmd_dma; 4705 4706 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4707 if (err != 0) 4708 goto abort_with_zeropad_dma; 4709 4710 /* select & load the firmware */ 4711 err = mxge_select_firmware(sc); 4712 if (err != 0) 4713 goto abort_with_dmabench; 4714 sc->intr_coal_delay = mxge_intr_coal_delay; 4715 4716 mxge_slice_probe(sc); 4717 err = mxge_alloc_slices(sc); 4718 if (err != 0) 4719 goto abort_with_dmabench; 4720 4721 err = mxge_reset(sc, 0); 4722 if (err != 0) 4723 goto abort_with_slices; 4724 4725 err = mxge_alloc_rings(sc); 4726 if (err != 0) { 4727 device_printf(sc->dev, "failed to allocate rings\n"); 4728 goto abort_with_slices; 4729 } 4730 4731 err = mxge_add_irq(sc); 4732 if (err != 0) { 4733 device_printf(sc->dev, "failed to add irq\n"); 4734 goto abort_with_rings; 4735 } 4736 4737 if_setbaudrate(ifp, IF_Gbps(10)); 4738 if_setcapabilities(ifp, IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4739 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4740 IFCAP_RXCSUM_IPV6); 4741 #if defined(INET) || defined(INET6) 4742 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0); 4743 #endif 4744 4745 #ifdef MXGE_NEW_VLAN_API 4746 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0); 4747 4748 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4749 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4750 sc->fw_ver_tiny >= 32) 4751 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0); 4752 #endif 4753 sc->max_mtu = mxge_max_mtu(sc); 4754 if (sc->max_mtu >= 9000) 4755 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0); 4756 else 4757 device_printf(dev, "MTU limited to %d. Install " 4758 "latest firmware for 9000 byte jumbo support\n", 4759 sc->max_mtu - ETHER_HDR_LEN); 4760 if_sethwassist(ifp, CSUM_TCP | CSUM_UDP | CSUM_TSO); 4761 if_sethwassistbits(ifp, CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0); 4762 /* check to see if f/w supports TSO for IPv6 */ 4763 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4764 if (CSUM_TCP_IPV6) 4765 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0); 4766 sc->max_tso6_hlen = min(cmd.data0, 4767 sizeof (sc->ss[0].scratch)); 4768 } 4769 if_setcapenable(ifp, if_getcapabilities(ifp)); 4770 if (sc->lro_cnt == 0) 4771 if_setcapenablebit(ifp, 0, IFCAP_LRO); 4772 if_setinitfn(ifp, mxge_init); 4773 if_setsoftc(ifp, sc); 4774 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 4775 if_setioctlfn(ifp, mxge_ioctl); 4776 if_setstartfn(ifp, mxge_start); 4777 if_setgetcounterfn(ifp, mxge_get_counter); 4778 if_sethwtsomax(ifp, IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 4779 if_sethwtsomaxsegcount(ifp, sc->ss[0].tx.max_desc); 4780 if_sethwtsomaxsegsize(ifp, IP_MAXPACKET); 4781 /* Initialise the ifmedia structure */ 4782 ifmedia_init(&sc->media, 0, mxge_media_change, 4783 mxge_media_status); 4784 mxge_media_init(sc); 4785 mxge_media_probe(sc); 4786 sc->dying = 0; 4787 ether_ifattach(ifp, sc->mac_addr); 4788 /* ether_ifattach sets mtu to ETHERMTU */ 4789 if (mxge_initial_mtu != ETHERMTU) 4790 mxge_change_mtu(sc, mxge_initial_mtu); 4791 4792 mxge_add_sysctls(sc); 4793 if_settransmitfn(ifp, mxge_transmit); 4794 if_setqflushfn(ifp, mxge_qflush); 4795 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4796 device_get_nameunit(sc->dev)); 4797 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4798 return 0; 4799 4800 abort_with_rings: 4801 mxge_free_rings(sc); 4802 abort_with_slices: 4803 mxge_free_slices(sc); 4804 abort_with_dmabench: 4805 mxge_dma_free(&sc->dmabench_dma); 4806 abort_with_zeropad_dma: 4807 mxge_dma_free(&sc->zeropad_dma); 4808 abort_with_cmd_dma: 4809 mxge_dma_free(&sc->cmd_dma); 4810 abort_with_mem_res: 4811 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4812 abort_with_lock: 4813 pci_disable_busmaster(dev); 4814 mtx_destroy(&sc->cmd_mtx); 4815 mtx_destroy(&sc->driver_mtx); 4816 if_free(ifp); 4817 abort_with_parent_dmat: 4818 bus_dma_tag_destroy(sc->parent_dmat); 4819 abort_with_tq: 4820 if (sc->tq != NULL) { 4821 taskqueue_drain(sc->tq, &sc->watchdog_task); 4822 taskqueue_free(sc->tq); 4823 sc->tq = NULL; 4824 } 4825 abort_with_nothing: 4826 return err; 4827 } 4828 4829 static int 4830 mxge_detach(device_t dev) 4831 { 4832 mxge_softc_t *sc = device_get_softc(dev); 4833 4834 if (mxge_vlans_active(sc)) { 4835 device_printf(sc->dev, 4836 "Detach vlans before removing module\n"); 4837 return EBUSY; 4838 } 4839 mtx_lock(&sc->driver_mtx); 4840 sc->dying = 1; 4841 if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) 4842 mxge_close(sc, 0); 4843 mtx_unlock(&sc->driver_mtx); 4844 ether_ifdetach(sc->ifp); 4845 if (sc->tq != NULL) { 4846 taskqueue_drain(sc->tq, &sc->watchdog_task); 4847 taskqueue_free(sc->tq); 4848 sc->tq = NULL; 4849 } 4850 callout_drain(&sc->co_hdl); 4851 ifmedia_removeall(&sc->media); 4852 mxge_dummy_rdma(sc, 0); 4853 mxge_rem_sysctls(sc); 4854 mxge_rem_irq(sc); 4855 mxge_free_rings(sc); 4856 mxge_free_slices(sc); 4857 mxge_dma_free(&sc->dmabench_dma); 4858 mxge_dma_free(&sc->zeropad_dma); 4859 mxge_dma_free(&sc->cmd_dma); 4860 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4861 pci_disable_busmaster(dev); 4862 mtx_destroy(&sc->cmd_mtx); 4863 mtx_destroy(&sc->driver_mtx); 4864 if_free(sc->ifp); 4865 bus_dma_tag_destroy(sc->parent_dmat); 4866 return 0; 4867 } 4868 4869 static int 4870 mxge_shutdown(device_t dev) 4871 { 4872 return 0; 4873 } 4874 4875 /* 4876 This file uses Myri10GE driver indentation. 4877 4878 Local Variables: 4879 c-file-style:"linux" 4880 tab-width:8 4881 End: 4882 */ 4883