1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #include <sys/buf_ring.h> 96 97 #include "opt_inet.h" 98 #include "opt_inet6.h" 99 100 /* tunable params */ 101 static int mxge_nvidia_ecrc_enable = 1; 102 static int mxge_force_firmware = 0; 103 static int mxge_intr_coal_delay = 30; 104 static int mxge_deassert_wait = 1; 105 static int mxge_flow_control = 1; 106 static int mxge_verbose = 0; 107 static int mxge_ticks; 108 static int mxge_max_slices = 1; 109 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 110 static int mxge_always_promisc = 0; 111 static int mxge_initial_mtu = ETHERMTU_JUMBO; 112 static int mxge_throttle = 0; 113 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 114 static char *mxge_fw_aligned = "mxge_eth_z8e"; 115 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 116 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 117 118 static int mxge_probe(device_t dev); 119 static int mxge_attach(device_t dev); 120 static int mxge_detach(device_t dev); 121 static int mxge_shutdown(device_t dev); 122 static void mxge_intr(void *arg); 123 124 static device_method_t mxge_methods[] = 125 { 126 /* Device interface */ 127 DEVMETHOD(device_probe, mxge_probe), 128 DEVMETHOD(device_attach, mxge_attach), 129 DEVMETHOD(device_detach, mxge_detach), 130 DEVMETHOD(device_shutdown, mxge_shutdown), 131 132 DEVMETHOD_END 133 }; 134 135 static driver_t mxge_driver = 136 { 137 "mxge", 138 mxge_methods, 139 sizeof(mxge_softc_t), 140 }; 141 142 /* Declare ourselves to be a child of the PCI bus.*/ 143 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0); 144 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 145 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 146 147 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 148 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 149 static int mxge_close(mxge_softc_t *sc, int down); 150 static int mxge_open(mxge_softc_t *sc); 151 static void mxge_tick(void *arg); 152 153 static int 154 mxge_probe(device_t dev) 155 { 156 int rev; 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 /* callback to get our DMA address */ 200 static void 201 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 202 int error) 203 { 204 if (error == 0) { 205 *(bus_addr_t *) arg = segs->ds_addr; 206 } 207 } 208 209 static int 210 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 211 bus_size_t alignment) 212 { 213 int err; 214 device_t dev = sc->dev; 215 bus_size_t boundary, maxsegsize; 216 217 if (bytes > 4096 && alignment == 4096) { 218 boundary = 0; 219 maxsegsize = bytes; 220 } else { 221 boundary = 4096; 222 maxsegsize = 4096; 223 } 224 225 /* allocate DMAable memory tags */ 226 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 227 alignment, /* alignment */ 228 boundary, /* boundary */ 229 BUS_SPACE_MAXADDR, /* low */ 230 BUS_SPACE_MAXADDR, /* high */ 231 NULL, NULL, /* filter */ 232 bytes, /* maxsize */ 233 1, /* num segs */ 234 maxsegsize, /* maxsegsize */ 235 BUS_DMA_COHERENT, /* flags */ 236 NULL, NULL, /* lock */ 237 &dma->dmat); /* tag */ 238 if (err != 0) { 239 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 240 return err; 241 } 242 243 /* allocate DMAable memory & map */ 244 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 245 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 246 | BUS_DMA_ZERO), &dma->map); 247 if (err != 0) { 248 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 249 goto abort_with_dmat; 250 } 251 252 /* load the memory */ 253 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 254 mxge_dmamap_callback, 255 (void *)&dma->bus_addr, 0); 256 if (err != 0) { 257 device_printf(dev, "couldn't load map (err = %d)\n", err); 258 goto abort_with_mem; 259 } 260 return 0; 261 262 abort_with_mem: 263 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 264 abort_with_dmat: 265 (void)bus_dma_tag_destroy(dma->dmat); 266 return err; 267 } 268 269 static void 270 mxge_dma_free(mxge_dma_t *dma) 271 { 272 bus_dmamap_unload(dma->dmat, dma->map); 273 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 274 (void)bus_dma_tag_destroy(dma->dmat); 275 } 276 277 /* 278 * The eeprom strings on the lanaiX have the format 279 * SN=x\0 280 * MAC=x:x:x:x:x:x\0 281 * PC=text\0 282 */ 283 284 static int 285 mxge_parse_strings(mxge_softc_t *sc) 286 { 287 char *ptr; 288 int i, found_mac, found_sn2; 289 char *endptr; 290 291 ptr = sc->eeprom_strings; 292 found_mac = 0; 293 found_sn2 = 0; 294 while (*ptr != '\0') { 295 if (strncmp(ptr, "MAC=", 4) == 0) { 296 ptr += 4; 297 for (i = 0;;) { 298 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 299 if (endptr - ptr != 2) 300 goto abort; 301 ptr = endptr; 302 if (++i == 6) 303 break; 304 if (*ptr++ != ':') 305 goto abort; 306 } 307 found_mac = 1; 308 } else if (strncmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strlcpy(sc->product_code_string, ptr, 311 sizeof(sc->product_code_string)); 312 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 313 ptr += 3; 314 strlcpy(sc->serial_number_string, ptr, 315 sizeof(sc->serial_number_string)); 316 } else if (strncmp(ptr, "SN2=", 4) == 0) { 317 /* SN2 takes precedence over SN */ 318 ptr += 4; 319 found_sn2 = 1; 320 strlcpy(sc->serial_number_string, ptr, 321 sizeof(sc->serial_number_string)); 322 } 323 while (*ptr++ != '\0') {} 324 } 325 326 if (found_mac) 327 return 0; 328 329 abort: 330 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 331 332 return ENXIO; 333 } 334 335 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 336 static void 337 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 338 { 339 uint32_t val; 340 unsigned long base, off; 341 char *va, *cfgptr; 342 device_t pdev, mcp55; 343 uint16_t vendor_id, device_id, word; 344 uintptr_t bus, slot, func, ivend, idev; 345 uint32_t *ptr32; 346 347 if (!mxge_nvidia_ecrc_enable) 348 return; 349 350 pdev = device_get_parent(device_get_parent(sc->dev)); 351 if (pdev == NULL) { 352 device_printf(sc->dev, "could not find parent?\n"); 353 return; 354 } 355 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 356 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 357 358 if (vendor_id != 0x10de) 359 return; 360 361 base = 0; 362 363 if (device_id == 0x005d) { 364 /* ck804, base address is magic */ 365 base = 0xe0000000UL; 366 } else if (device_id >= 0x0374 && device_id <= 0x378) { 367 /* mcp55, base address stored in chipset */ 368 mcp55 = pci_find_bsf(0, 0, 0); 369 if (mcp55 && 370 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 371 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 372 word = pci_read_config(mcp55, 0x90, 2); 373 base = ((unsigned long)word & 0x7ffeU) << 25; 374 } 375 } 376 if (!base) 377 return; 378 379 /* XXXX 380 Test below is commented because it is believed that doing 381 config read/write beyond 0xff will access the config space 382 for the next larger function. Uncomment this and remove 383 the hacky pmap_mapdev() way of accessing config space when 384 FreeBSD grows support for extended pcie config space access 385 */ 386 #if 0 387 /* See if we can, by some miracle, access the extended 388 config space */ 389 val = pci_read_config(pdev, 0x178, 4); 390 if (val != 0xffffffff) { 391 val |= 0x40; 392 pci_write_config(pdev, 0x178, val, 4); 393 return; 394 } 395 #endif 396 /* Rather than using normal pci config space writes, we must 397 * map the Nvidia config space ourselves. This is because on 398 * opteron/nvidia class machine the 0xe000000 mapping is 399 * handled by the nvidia chipset, that means the internal PCI 400 * device (the on-chip northbridge), or the amd-8131 bridge 401 * and things behind them are not visible by this method. 402 */ 403 404 BUS_READ_IVAR(device_get_parent(pdev), pdev, 405 PCI_IVAR_BUS, &bus); 406 BUS_READ_IVAR(device_get_parent(pdev), pdev, 407 PCI_IVAR_SLOT, &slot); 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_FUNCTION, &func); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_VENDOR, &ivend); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_DEVICE, &idev); 414 415 off = base 416 + 0x00100000UL * (unsigned long)bus 417 + 0x00001000UL * (unsigned long)(func 418 + 8 * slot); 419 420 /* map it into the kernel */ 421 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 422 423 if (va == NULL) { 424 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 425 return; 426 } 427 /* get a pointer to the config space mapped into the kernel */ 428 cfgptr = va + (off & PAGE_MASK); 429 430 /* make sure that we can really access it */ 431 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 432 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 433 if (! (vendor_id == ivend && device_id == idev)) { 434 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 435 vendor_id, device_id); 436 pmap_unmapdev(va, PAGE_SIZE); 437 return; 438 } 439 440 ptr32 = (uint32_t*)(cfgptr + 0x178); 441 val = *ptr32; 442 443 if (val == 0xffffffff) { 444 device_printf(sc->dev, "extended mapping failed\n"); 445 pmap_unmapdev(va, PAGE_SIZE); 446 return; 447 } 448 *ptr32 = val | 0x40; 449 pmap_unmapdev(va, PAGE_SIZE); 450 if (mxge_verbose) 451 device_printf(sc->dev, 452 "Enabled ECRC on upstream Nvidia bridge " 453 "at %d:%d:%d\n", 454 (int)bus, (int)slot, (int)func); 455 return; 456 } 457 #else 458 static void 459 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 460 { 461 device_printf(sc->dev, 462 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 463 return; 464 } 465 #endif 466 467 static int 468 mxge_dma_test(mxge_softc_t *sc, int test_type) 469 { 470 mxge_cmd_t cmd; 471 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 472 int status; 473 uint32_t len; 474 char *test = " "; 475 476 /* Run a small DMA test. 477 * The magic multipliers to the length tell the firmware 478 * to do DMA read, write, or read+write tests. The 479 * results are returned in cmd.data0. The upper 16 480 * bits of the return is the number of transfers completed. 481 * The lower 16 bits is the time in 0.5us ticks that the 482 * transfers took to complete. 483 */ 484 485 len = sc->tx_boundary; 486 487 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 488 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 489 cmd.data2 = len * 0x10000; 490 status = mxge_send_cmd(sc, test_type, &cmd); 491 if (status != 0) { 492 test = "read"; 493 goto abort; 494 } 495 sc->read_dma = ((cmd.data0>>16) * len * 2) / 496 (cmd.data0 & 0xffff); 497 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 498 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 499 cmd.data2 = len * 0x1; 500 status = mxge_send_cmd(sc, test_type, &cmd); 501 if (status != 0) { 502 test = "write"; 503 goto abort; 504 } 505 sc->write_dma = ((cmd.data0>>16) * len * 2) / 506 (cmd.data0 & 0xffff); 507 508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 510 cmd.data2 = len * 0x10001; 511 status = mxge_send_cmd(sc, test_type, &cmd); 512 if (status != 0) { 513 test = "read/write"; 514 goto abort; 515 } 516 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 517 (cmd.data0 & 0xffff); 518 519 abort: 520 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 521 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 522 test, status); 523 524 return status; 525 } 526 527 /* 528 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 529 * when the PCI-E Completion packets are aligned on an 8-byte 530 * boundary. Some PCI-E chip sets always align Completion packets; on 531 * the ones that do not, the alignment can be enforced by enabling 532 * ECRC generation (if supported). 533 * 534 * When PCI-E Completion packets are not aligned, it is actually more 535 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 536 * 537 * If the driver can neither enable ECRC nor verify that it has 538 * already been enabled, then it must use a firmware image which works 539 * around unaligned completion packets (ethp_z8e.dat), and it should 540 * also ensure that it never gives the device a Read-DMA which is 541 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 542 * enabled, then the driver should use the aligned (eth_z8e.dat) 543 * firmware image, and set tx_boundary to 4KB. 544 */ 545 546 static int 547 mxge_firmware_probe(mxge_softc_t *sc) 548 { 549 device_t dev = sc->dev; 550 int reg, status; 551 uint16_t pectl; 552 553 sc->tx_boundary = 4096; 554 /* 555 * Verify the max read request size was set to 4KB 556 * before trying the test with 4KB. 557 */ 558 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 559 pectl = pci_read_config(dev, reg + 0x8, 2); 560 if ((pectl & (5 << 12)) != (5 << 12)) { 561 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 562 pectl); 563 sc->tx_boundary = 2048; 564 } 565 } 566 567 /* 568 * load the optimized firmware (which assumes aligned PCIe 569 * completions) in order to see if it works on this host. 570 */ 571 sc->fw_name = mxge_fw_aligned; 572 status = mxge_load_firmware(sc, 1); 573 if (status != 0) { 574 return status; 575 } 576 577 /* 578 * Enable ECRC if possible 579 */ 580 mxge_enable_nvidia_ecrc(sc); 581 582 /* 583 * Run a DMA test which watches for unaligned completions and 584 * aborts on the first one seen. Not required on Z8ES or newer. 585 */ 586 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 587 return 0; 588 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 589 if (status == 0) 590 return 0; /* keep the aligned firmware */ 591 592 if (status != E2BIG) 593 device_printf(dev, "DMA test failed: %d\n", status); 594 if (status == ENOSYS) 595 device_printf(dev, "Falling back to ethp! " 596 "Please install up to date fw\n"); 597 return status; 598 } 599 600 static int 601 mxge_select_firmware(mxge_softc_t *sc) 602 { 603 int aligned = 0; 604 int force_firmware = mxge_force_firmware; 605 606 if (sc->throttle) 607 force_firmware = sc->throttle; 608 609 if (force_firmware != 0) { 610 if (force_firmware == 1) 611 aligned = 1; 612 else 613 aligned = 0; 614 if (mxge_verbose) 615 device_printf(sc->dev, 616 "Assuming %s completions (forced)\n", 617 aligned ? "aligned" : "unaligned"); 618 goto abort; 619 } 620 621 /* if the PCIe link width is 4 or less, we can use the aligned 622 firmware and skip any checks */ 623 if (sc->link_width != 0 && sc->link_width <= 4) { 624 device_printf(sc->dev, 625 "PCIe x%d Link, expect reduced performance\n", 626 sc->link_width); 627 aligned = 1; 628 goto abort; 629 } 630 631 if (0 == mxge_firmware_probe(sc)) 632 return 0; 633 634 abort: 635 if (aligned) { 636 sc->fw_name = mxge_fw_aligned; 637 sc->tx_boundary = 4096; 638 } else { 639 sc->fw_name = mxge_fw_unaligned; 640 sc->tx_boundary = 2048; 641 } 642 return (mxge_load_firmware(sc, 0)); 643 } 644 645 static int 646 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 647 { 648 649 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 650 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 651 be32toh(hdr->mcp_type)); 652 return EIO; 653 } 654 655 /* save firmware version for sysctl */ 656 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 657 if (mxge_verbose) 658 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 659 660 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 661 &sc->fw_ver_minor, &sc->fw_ver_tiny); 662 663 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 664 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 665 device_printf(sc->dev, "Found firmware version %s\n", 666 sc->fw_version); 667 device_printf(sc->dev, "Driver needs %d.%d\n", 668 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 669 return EINVAL; 670 } 671 return 0; 672 673 } 674 675 static int 676 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 677 { 678 z_stream zs; 679 char *inflate_buffer; 680 const struct firmware *fw; 681 const mcp_gen_header_t *hdr; 682 unsigned hdr_offset; 683 int status; 684 unsigned int i; 685 size_t fw_len; 686 687 fw = firmware_get(sc->fw_name); 688 if (fw == NULL) { 689 device_printf(sc->dev, "Could not find firmware image %s\n", 690 sc->fw_name); 691 return ENOENT; 692 } 693 694 /* setup zlib and decompress f/w */ 695 bzero(&zs, sizeof (zs)); 696 zs.zalloc = zcalloc_nowait; 697 zs.zfree = zcfree; 698 status = inflateInit(&zs); 699 if (status != Z_OK) { 700 status = EIO; 701 goto abort_with_fw; 702 } 703 704 /* the uncompressed size is stored as the firmware version, 705 which would otherwise go unused */ 706 fw_len = (size_t) fw->version; 707 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 708 if (inflate_buffer == NULL) 709 goto abort_with_zs; 710 zs.avail_in = fw->datasize; 711 zs.next_in = __DECONST(char *, fw->data); 712 zs.avail_out = fw_len; 713 zs.next_out = inflate_buffer; 714 status = inflate(&zs, Z_FINISH); 715 if (status != Z_STREAM_END) { 716 device_printf(sc->dev, "zlib %d\n", status); 717 status = EIO; 718 goto abort_with_buffer; 719 } 720 721 /* check id */ 722 hdr_offset = htobe32(*(const uint32_t *) 723 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 724 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 725 device_printf(sc->dev, "Bad firmware file"); 726 status = EIO; 727 goto abort_with_buffer; 728 } 729 hdr = (const void*)(inflate_buffer + hdr_offset); 730 731 status = mxge_validate_firmware(sc, hdr); 732 if (status != 0) 733 goto abort_with_buffer; 734 735 /* Copy the inflated firmware to NIC SRAM. */ 736 for (i = 0; i < fw_len; i += 256) { 737 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 738 inflate_buffer + i, 739 min(256U, (unsigned)(fw_len - i))); 740 wmb(); 741 (void)*sc->sram; 742 wmb(); 743 } 744 745 *limit = fw_len; 746 status = 0; 747 abort_with_buffer: 748 free(inflate_buffer, M_TEMP); 749 abort_with_zs: 750 inflateEnd(&zs); 751 abort_with_fw: 752 firmware_put(fw, FIRMWARE_UNLOAD); 753 return status; 754 } 755 756 /* 757 * Enable or disable periodic RDMAs from the host to make certain 758 * chipsets resend dropped PCIe messages 759 */ 760 761 static void 762 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 763 { 764 char buf_bytes[72]; 765 volatile uint32_t *confirm; 766 volatile char *submit; 767 uint32_t *buf, dma_low, dma_high; 768 int i; 769 770 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 771 772 /* clear confirmation addr */ 773 confirm = (volatile uint32_t *)sc->cmd; 774 *confirm = 0; 775 wmb(); 776 777 /* send an rdma command to the PCIe engine, and wait for the 778 response in the confirmation address. The firmware should 779 write a -1 there to indicate it is alive and well 780 */ 781 782 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 783 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 784 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 785 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 786 buf[2] = htobe32(0xffffffff); /* confirm data */ 787 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 788 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 789 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 790 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 791 buf[5] = htobe32(enable); /* enable? */ 792 793 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 794 795 mxge_pio_copy(submit, buf, 64); 796 wmb(); 797 DELAY(1000); 798 wmb(); 799 i = 0; 800 while (*confirm != 0xffffffff && i < 20) { 801 DELAY(1000); 802 i++; 803 } 804 if (*confirm != 0xffffffff) { 805 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 806 (enable ? "enable" : "disable"), confirm, 807 *confirm); 808 } 809 return; 810 } 811 812 static int 813 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 814 { 815 mcp_cmd_t *buf; 816 char buf_bytes[sizeof(*buf) + 8]; 817 volatile mcp_cmd_response_t *response = sc->cmd; 818 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 819 uint32_t dma_low, dma_high; 820 int err, sleep_total = 0; 821 822 /* ensure buf is aligned to 8 bytes */ 823 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 824 825 buf->data0 = htobe32(data->data0); 826 buf->data1 = htobe32(data->data1); 827 buf->data2 = htobe32(data->data2); 828 buf->cmd = htobe32(cmd); 829 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 830 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 831 832 buf->response_addr.low = htobe32(dma_low); 833 buf->response_addr.high = htobe32(dma_high); 834 mtx_lock(&sc->cmd_mtx); 835 response->result = 0xffffffff; 836 wmb(); 837 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 838 839 /* wait up to 20ms */ 840 err = EAGAIN; 841 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 842 bus_dmamap_sync(sc->cmd_dma.dmat, 843 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 844 wmb(); 845 switch (be32toh(response->result)) { 846 case 0: 847 data->data0 = be32toh(response->data); 848 err = 0; 849 break; 850 case 0xffffffff: 851 DELAY(1000); 852 break; 853 case MXGEFW_CMD_UNKNOWN: 854 err = ENOSYS; 855 break; 856 case MXGEFW_CMD_ERROR_UNALIGNED: 857 err = E2BIG; 858 break; 859 case MXGEFW_CMD_ERROR_BUSY: 860 err = EBUSY; 861 break; 862 case MXGEFW_CMD_ERROR_I2C_ABSENT: 863 err = ENXIO; 864 break; 865 default: 866 device_printf(sc->dev, 867 "mxge: command %d " 868 "failed, result = %d\n", 869 cmd, be32toh(response->result)); 870 err = ENXIO; 871 break; 872 } 873 if (err != EAGAIN) 874 break; 875 } 876 if (err == EAGAIN) 877 device_printf(sc->dev, "mxge: command %d timed out" 878 "result = %d\n", 879 cmd, be32toh(response->result)); 880 mtx_unlock(&sc->cmd_mtx); 881 return err; 882 } 883 884 static int 885 mxge_adopt_running_firmware(mxge_softc_t *sc) 886 { 887 struct mcp_gen_header *hdr; 888 const size_t bytes = sizeof (struct mcp_gen_header); 889 size_t hdr_offset; 890 int status; 891 892 /* find running firmware header */ 893 hdr_offset = htobe32(*(volatile uint32_t *) 894 (sc->sram + MCP_HEADER_PTR_OFFSET)); 895 896 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 897 device_printf(sc->dev, 898 "Running firmware has bad header offset (%d)\n", 899 (int)hdr_offset); 900 return EIO; 901 } 902 903 /* copy header of running firmware from SRAM to host memory to 904 * validate firmware */ 905 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 906 if (hdr == NULL) { 907 device_printf(sc->dev, "could not malloc firmware hdr\n"); 908 return ENOMEM; 909 } 910 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 911 rman_get_bushandle(sc->mem_res), 912 hdr_offset, (char *)hdr, bytes); 913 status = mxge_validate_firmware(sc, hdr); 914 free(hdr, M_DEVBUF); 915 916 /* 917 * check to see if adopted firmware has bug where adopting 918 * it will cause broadcasts to be filtered unless the NIC 919 * is kept in ALLMULTI mode 920 */ 921 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 922 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 923 sc->adopted_rx_filter_bug = 1; 924 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 925 "working around rx filter bug\n", 926 sc->fw_ver_major, sc->fw_ver_minor, 927 sc->fw_ver_tiny); 928 } 929 930 return status; 931 } 932 933 static int 934 mxge_load_firmware(mxge_softc_t *sc, int adopt) 935 { 936 volatile uint32_t *confirm; 937 volatile char *submit; 938 char buf_bytes[72]; 939 uint32_t *buf, size, dma_low, dma_high; 940 int status, i; 941 942 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 943 944 size = sc->sram_size; 945 status = mxge_load_firmware_helper(sc, &size); 946 if (status) { 947 if (!adopt) 948 return status; 949 /* Try to use the currently running firmware, if 950 it is new enough */ 951 status = mxge_adopt_running_firmware(sc); 952 if (status) { 953 device_printf(sc->dev, 954 "failed to adopt running firmware\n"); 955 return status; 956 } 957 device_printf(sc->dev, 958 "Successfully adopted running firmware\n"); 959 if (sc->tx_boundary == 4096) { 960 device_printf(sc->dev, 961 "Using firmware currently running on NIC" 962 ". For optimal\n"); 963 device_printf(sc->dev, 964 "performance consider loading optimized " 965 "firmware\n"); 966 } 967 sc->fw_name = mxge_fw_unaligned; 968 sc->tx_boundary = 2048; 969 return 0; 970 } 971 /* clear confirmation addr */ 972 confirm = (volatile uint32_t *)sc->cmd; 973 *confirm = 0; 974 wmb(); 975 /* send a reload command to the bootstrap MCP, and wait for the 976 response in the confirmation address. The firmware should 977 write a -1 there to indicate it is alive and well 978 */ 979 980 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 981 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 982 983 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 984 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 985 buf[2] = htobe32(0xffffffff); /* confirm data */ 986 987 /* FIX: All newest firmware should un-protect the bottom of 988 the sram before handoff. However, the very first interfaces 989 do not. Therefore the handoff copy must skip the first 8 bytes 990 */ 991 /* where the code starts*/ 992 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 993 buf[4] = htobe32(size - 8); /* length of code */ 994 buf[5] = htobe32(8); /* where to copy to */ 995 buf[6] = htobe32(0); /* where to jump to */ 996 997 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 998 mxge_pio_copy(submit, buf, 64); 999 wmb(); 1000 DELAY(1000); 1001 wmb(); 1002 i = 0; 1003 while (*confirm != 0xffffffff && i < 20) { 1004 DELAY(1000*10); 1005 i++; 1006 bus_dmamap_sync(sc->cmd_dma.dmat, 1007 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1008 } 1009 if (*confirm != 0xffffffff) { 1010 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1011 confirm, *confirm); 1012 1013 return ENXIO; 1014 } 1015 return 0; 1016 } 1017 1018 static int 1019 mxge_update_mac_address(mxge_softc_t *sc) 1020 { 1021 mxge_cmd_t cmd; 1022 uint8_t *addr = sc->mac_addr; 1023 int status; 1024 1025 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1026 | (addr[2] << 8) | addr[3]); 1027 1028 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1029 1030 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1031 return status; 1032 } 1033 1034 static int 1035 mxge_change_pause(mxge_softc_t *sc, int pause) 1036 { 1037 mxge_cmd_t cmd; 1038 int status; 1039 1040 if (pause) 1041 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1042 &cmd); 1043 else 1044 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1045 &cmd); 1046 1047 if (status) { 1048 device_printf(sc->dev, "Failed to set flow control mode\n"); 1049 return ENXIO; 1050 } 1051 sc->pause = pause; 1052 return 0; 1053 } 1054 1055 static void 1056 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1057 { 1058 mxge_cmd_t cmd; 1059 int status; 1060 1061 if (mxge_always_promisc) 1062 promisc = 1; 1063 1064 if (promisc) 1065 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1066 &cmd); 1067 else 1068 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1069 &cmd); 1070 1071 if (status) { 1072 device_printf(sc->dev, "Failed to set promisc mode\n"); 1073 } 1074 } 1075 1076 struct mxge_add_maddr_ctx { 1077 mxge_softc_t *sc; 1078 int error; 1079 }; 1080 1081 static u_int 1082 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1083 { 1084 struct mxge_add_maddr_ctx *ctx = arg; 1085 mxge_cmd_t cmd; 1086 1087 if (ctx->error != 0) 1088 return (0); 1089 bcopy(LLADDR(sdl), &cmd.data0, 4); 1090 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1091 cmd.data0 = htonl(cmd.data0); 1092 cmd.data1 = htonl(cmd.data1); 1093 1094 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1095 1096 return (1); 1097 } 1098 1099 static void 1100 mxge_set_multicast_list(mxge_softc_t *sc) 1101 { 1102 struct mxge_add_maddr_ctx ctx; 1103 struct ifnet *ifp = sc->ifp; 1104 mxge_cmd_t cmd; 1105 int err; 1106 1107 /* This firmware is known to not support multicast */ 1108 if (!sc->fw_multicast_support) 1109 return; 1110 1111 /* Disable multicast filtering while we play with the lists*/ 1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1113 if (err != 0) { 1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1115 " error status: %d\n", err); 1116 return; 1117 } 1118 1119 if (sc->adopted_rx_filter_bug) 1120 return; 1121 1122 if (ifp->if_flags & IFF_ALLMULTI) 1123 /* request to disable multicast filtering, so quit here */ 1124 return; 1125 1126 /* Flush all the filters */ 1127 1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1129 if (err != 0) { 1130 device_printf(sc->dev, 1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1132 ", error status: %d\n", err); 1133 return; 1134 } 1135 1136 /* Walk the multicast list, and add each address */ 1137 ctx.sc = sc; 1138 ctx.error = 0; 1139 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1140 if (ctx.error != 0) { 1141 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1142 "error status:" "%d\t", ctx.error); 1143 /* abort, leaving multicast filtering off */ 1144 return; 1145 } 1146 1147 /* Enable multicast filtering */ 1148 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1149 if (err != 0) { 1150 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1151 ", error status: %d\n", err); 1152 } 1153 } 1154 1155 static int 1156 mxge_max_mtu(mxge_softc_t *sc) 1157 { 1158 mxge_cmd_t cmd; 1159 int status; 1160 1161 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1162 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1163 1164 /* try to set nbufs to see if it we can 1165 use virtually contiguous jumbos */ 1166 cmd.data0 = 0; 1167 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1168 &cmd); 1169 if (status == 0) 1170 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1171 1172 /* otherwise, we're limited to MJUMPAGESIZE */ 1173 return MJUMPAGESIZE - MXGEFW_PAD; 1174 } 1175 1176 static int 1177 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1178 { 1179 struct mxge_slice_state *ss; 1180 mxge_rx_done_t *rx_done; 1181 volatile uint32_t *irq_claim; 1182 mxge_cmd_t cmd; 1183 int slice, status; 1184 1185 /* try to send a reset command to the card to see if it 1186 is alive */ 1187 memset(&cmd, 0, sizeof (cmd)); 1188 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1189 if (status != 0) { 1190 device_printf(sc->dev, "failed reset\n"); 1191 return ENXIO; 1192 } 1193 1194 mxge_dummy_rdma(sc, 1); 1195 1196 /* set the intrq size */ 1197 cmd.data0 = sc->rx_ring_size; 1198 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1199 1200 /* 1201 * Even though we already know how many slices are supported 1202 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1203 * has magic side effects, and must be called after a reset. 1204 * It must be called prior to calling any RSS related cmds, 1205 * including assigning an interrupt queue for anything but 1206 * slice 0. It must also be called *after* 1207 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1208 * the firmware to compute offsets. 1209 */ 1210 1211 if (sc->num_slices > 1) { 1212 /* ask the maximum number of slices it supports */ 1213 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1214 &cmd); 1215 if (status != 0) { 1216 device_printf(sc->dev, 1217 "failed to get number of slices\n"); 1218 return status; 1219 } 1220 /* 1221 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1222 * to setting up the interrupt queue DMA 1223 */ 1224 cmd.data0 = sc->num_slices; 1225 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1226 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1227 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1228 &cmd); 1229 if (status != 0) { 1230 device_printf(sc->dev, 1231 "failed to set number of slices\n"); 1232 return status; 1233 } 1234 } 1235 1236 if (interrupts_setup) { 1237 /* Now exchange information about interrupts */ 1238 for (slice = 0; slice < sc->num_slices; slice++) { 1239 rx_done = &sc->ss[slice].rx_done; 1240 memset(rx_done->entry, 0, sc->rx_ring_size); 1241 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1242 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1243 cmd.data2 = slice; 1244 status |= mxge_send_cmd(sc, 1245 MXGEFW_CMD_SET_INTRQ_DMA, 1246 &cmd); 1247 } 1248 } 1249 1250 status |= mxge_send_cmd(sc, 1251 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1252 1253 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1254 1255 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1256 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1257 1258 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1259 &cmd); 1260 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1261 if (status != 0) { 1262 device_printf(sc->dev, "failed set interrupt parameters\n"); 1263 return status; 1264 } 1265 1266 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1267 1268 /* run a DMA benchmark */ 1269 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1270 1271 for (slice = 0; slice < sc->num_slices; slice++) { 1272 ss = &sc->ss[slice]; 1273 1274 ss->irq_claim = irq_claim + (2 * slice); 1275 /* reset mcp/driver shared state back to 0 */ 1276 ss->rx_done.idx = 0; 1277 ss->rx_done.cnt = 0; 1278 ss->tx.req = 0; 1279 ss->tx.done = 0; 1280 ss->tx.pkt_done = 0; 1281 ss->tx.queue_active = 0; 1282 ss->tx.activate = 0; 1283 ss->tx.deactivate = 0; 1284 ss->tx.wake = 0; 1285 ss->tx.defrag = 0; 1286 ss->tx.stall = 0; 1287 ss->rx_big.cnt = 0; 1288 ss->rx_small.cnt = 0; 1289 ss->lc.lro_bad_csum = 0; 1290 ss->lc.lro_queued = 0; 1291 ss->lc.lro_flushed = 0; 1292 if (ss->fw_stats != NULL) { 1293 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1294 } 1295 } 1296 sc->rdma_tags_available = 15; 1297 status = mxge_update_mac_address(sc); 1298 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1299 mxge_change_pause(sc, sc->pause); 1300 mxge_set_multicast_list(sc); 1301 if (sc->throttle) { 1302 cmd.data0 = sc->throttle; 1303 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1304 &cmd)) { 1305 device_printf(sc->dev, 1306 "can't enable throttle\n"); 1307 } 1308 } 1309 return status; 1310 } 1311 1312 static int 1313 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1314 { 1315 mxge_cmd_t cmd; 1316 mxge_softc_t *sc; 1317 int err; 1318 unsigned int throttle; 1319 1320 sc = arg1; 1321 throttle = sc->throttle; 1322 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1323 if (err != 0) { 1324 return err; 1325 } 1326 1327 if (throttle == sc->throttle) 1328 return 0; 1329 1330 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1331 return EINVAL; 1332 1333 mtx_lock(&sc->driver_mtx); 1334 cmd.data0 = throttle; 1335 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1336 if (err == 0) 1337 sc->throttle = throttle; 1338 mtx_unlock(&sc->driver_mtx); 1339 return err; 1340 } 1341 1342 static int 1343 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1344 { 1345 mxge_softc_t *sc; 1346 unsigned int intr_coal_delay; 1347 int err; 1348 1349 sc = arg1; 1350 intr_coal_delay = sc->intr_coal_delay; 1351 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1352 if (err != 0) { 1353 return err; 1354 } 1355 if (intr_coal_delay == sc->intr_coal_delay) 1356 return 0; 1357 1358 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1359 return EINVAL; 1360 1361 mtx_lock(&sc->driver_mtx); 1362 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1363 sc->intr_coal_delay = intr_coal_delay; 1364 1365 mtx_unlock(&sc->driver_mtx); 1366 return err; 1367 } 1368 1369 static int 1370 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1371 { 1372 mxge_softc_t *sc; 1373 unsigned int enabled; 1374 int err; 1375 1376 sc = arg1; 1377 enabled = sc->pause; 1378 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1379 if (err != 0) { 1380 return err; 1381 } 1382 if (enabled == sc->pause) 1383 return 0; 1384 1385 mtx_lock(&sc->driver_mtx); 1386 err = mxge_change_pause(sc, enabled); 1387 mtx_unlock(&sc->driver_mtx); 1388 return err; 1389 } 1390 1391 static int 1392 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1393 { 1394 int err; 1395 1396 if (arg1 == NULL) 1397 return EFAULT; 1398 arg2 = be32toh(*(int *)arg1); 1399 arg1 = NULL; 1400 err = sysctl_handle_int(oidp, arg1, arg2, req); 1401 1402 return err; 1403 } 1404 1405 static void 1406 mxge_rem_sysctls(mxge_softc_t *sc) 1407 { 1408 struct mxge_slice_state *ss; 1409 int slice; 1410 1411 if (sc->slice_sysctl_tree == NULL) 1412 return; 1413 1414 for (slice = 0; slice < sc->num_slices; slice++) { 1415 ss = &sc->ss[slice]; 1416 if (ss == NULL || ss->sysctl_tree == NULL) 1417 continue; 1418 sysctl_ctx_free(&ss->sysctl_ctx); 1419 ss->sysctl_tree = NULL; 1420 } 1421 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1422 sc->slice_sysctl_tree = NULL; 1423 } 1424 1425 static void 1426 mxge_add_sysctls(mxge_softc_t *sc) 1427 { 1428 struct sysctl_ctx_list *ctx; 1429 struct sysctl_oid_list *children; 1430 mcp_irq_data_t *fw; 1431 struct mxge_slice_state *ss; 1432 int slice; 1433 char slice_num[8]; 1434 1435 ctx = device_get_sysctl_ctx(sc->dev); 1436 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1437 fw = sc->ss[0].fw_stats; 1438 1439 /* random information */ 1440 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1441 "firmware_version", 1442 CTLFLAG_RD, sc->fw_version, 1443 0, "firmware version"); 1444 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1445 "serial_number", 1446 CTLFLAG_RD, sc->serial_number_string, 1447 0, "serial number"); 1448 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1449 "product_code", 1450 CTLFLAG_RD, sc->product_code_string, 1451 0, "product_code"); 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1453 "pcie_link_width", 1454 CTLFLAG_RD, &sc->link_width, 1455 0, "tx_boundary"); 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1457 "tx_boundary", 1458 CTLFLAG_RD, &sc->tx_boundary, 1459 0, "tx_boundary"); 1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1461 "write_combine", 1462 CTLFLAG_RD, &sc->wc, 1463 0, "write combining PIO?"); 1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1465 "read_dma_MBs", 1466 CTLFLAG_RD, &sc->read_dma, 1467 0, "DMA Read speed in MB/s"); 1468 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1469 "write_dma_MBs", 1470 CTLFLAG_RD, &sc->write_dma, 1471 0, "DMA Write speed in MB/s"); 1472 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1473 "read_write_dma_MBs", 1474 CTLFLAG_RD, &sc->read_write_dma, 1475 0, "DMA concurrent Read/Write speed in MB/s"); 1476 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1477 "watchdog_resets", 1478 CTLFLAG_RD, &sc->watchdog_resets, 1479 0, "Number of times NIC was reset"); 1480 1481 /* performance related tunables */ 1482 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1483 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1484 sc, 0, mxge_change_intr_coal, "I", 1485 "interrupt coalescing delay in usecs"); 1486 1487 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1488 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1489 mxge_change_throttle, "I", "transmit throttling"); 1490 1491 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1492 "flow_control_enabled", 1493 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1494 mxge_change_flow_control, "I", 1495 "interrupt coalescing delay in usecs"); 1496 1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1498 "deassert_wait", 1499 CTLFLAG_RW, &mxge_deassert_wait, 1500 0, "Wait for IRQ line to go low in ihandler"); 1501 1502 /* stats block from firmware is in network byte order. 1503 Need to swap it */ 1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1505 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1506 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1507 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1508 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1509 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1510 "rdma_tags_available"); 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1513 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1514 "dropped_bad_crc32"); 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1516 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1517 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "dropped_link_error_or_filtered", 1520 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1521 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1522 "dropped_link_error_or_filtered"); 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1524 "dropped_link_overflow", 1525 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1526 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1527 "dropped_link_overflow"); 1528 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1529 "dropped_multicast_filtered", 1530 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1531 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1532 "dropped_multicast_filtered"); 1533 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1534 "dropped_no_big_buffer", 1535 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1536 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1537 "dropped_no_big_buffer"); 1538 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1539 "dropped_no_small_buffer", 1540 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1541 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1542 "dropped_no_small_buffer"); 1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1544 "dropped_overrun", 1545 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1546 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1547 "dropped_overrun"); 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1550 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1552 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1553 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1554 1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1556 "dropped_unicast_filtered", 1557 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1558 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1559 "dropped_unicast_filtered"); 1560 1561 /* verbose printing? */ 1562 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1563 "verbose", 1564 CTLFLAG_RW, &mxge_verbose, 1565 0, "verbose printing"); 1566 1567 /* add counters exported for debugging from all slices */ 1568 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1569 sc->slice_sysctl_tree = 1570 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1571 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1572 1573 for (slice = 0; slice < sc->num_slices; slice++) { 1574 ss = &sc->ss[slice]; 1575 sysctl_ctx_init(&ss->sysctl_ctx); 1576 ctx = &ss->sysctl_ctx; 1577 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1578 sprintf(slice_num, "%d", slice); 1579 ss->sysctl_tree = 1580 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1581 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1582 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1583 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1584 "rx_small_cnt", 1585 CTLFLAG_RD, &ss->rx_small.cnt, 1586 0, "rx_small_cnt"); 1587 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1588 "rx_big_cnt", 1589 CTLFLAG_RD, &ss->rx_big.cnt, 1590 0, "rx_small_cnt"); 1591 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1592 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1593 0, "number of lro merge queues flushed"); 1594 1595 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1596 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1597 0, "number of bad csums preventing LRO"); 1598 1599 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1600 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1601 0, "number of frames appended to lro merge" 1602 "queues"); 1603 1604 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1605 "tx_req", 1606 CTLFLAG_RD, &ss->tx.req, 1607 0, "tx_req"); 1608 1609 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1610 "tx_done", 1611 CTLFLAG_RD, &ss->tx.done, 1612 0, "tx_done"); 1613 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1614 "tx_pkt_done", 1615 CTLFLAG_RD, &ss->tx.pkt_done, 1616 0, "tx_done"); 1617 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1618 "tx_stall", 1619 CTLFLAG_RD, &ss->tx.stall, 1620 0, "tx_stall"); 1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1622 "tx_wake", 1623 CTLFLAG_RD, &ss->tx.wake, 1624 0, "tx_wake"); 1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1626 "tx_defrag", 1627 CTLFLAG_RD, &ss->tx.defrag, 1628 0, "tx_defrag"); 1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1630 "tx_queue_active", 1631 CTLFLAG_RD, &ss->tx.queue_active, 1632 0, "tx_queue_active"); 1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1634 "tx_activate", 1635 CTLFLAG_RD, &ss->tx.activate, 1636 0, "tx_activate"); 1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1638 "tx_deactivate", 1639 CTLFLAG_RD, &ss->tx.deactivate, 1640 0, "tx_deactivate"); 1641 } 1642 } 1643 1644 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1645 backwards one at a time and handle ring wraps */ 1646 1647 static inline void 1648 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1649 mcp_kreq_ether_send_t *src, int cnt) 1650 { 1651 int idx, starting_slot; 1652 starting_slot = tx->req; 1653 while (cnt > 1) { 1654 cnt--; 1655 idx = (starting_slot + cnt) & tx->mask; 1656 mxge_pio_copy(&tx->lanai[idx], 1657 &src[cnt], sizeof(*src)); 1658 wmb(); 1659 } 1660 } 1661 1662 /* 1663 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1664 * at most 32 bytes at a time, so as to avoid involving the software 1665 * pio handler in the nic. We re-write the first segment's flags 1666 * to mark them valid only after writing the entire chain 1667 */ 1668 1669 static inline void 1670 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1671 int cnt) 1672 { 1673 int idx, i; 1674 uint32_t *src_ints; 1675 volatile uint32_t *dst_ints; 1676 mcp_kreq_ether_send_t *srcp; 1677 volatile mcp_kreq_ether_send_t *dstp, *dst; 1678 uint8_t last_flags; 1679 1680 idx = tx->req & tx->mask; 1681 1682 last_flags = src->flags; 1683 src->flags = 0; 1684 wmb(); 1685 dst = dstp = &tx->lanai[idx]; 1686 srcp = src; 1687 1688 if ((idx + cnt) < tx->mask) { 1689 for (i = 0; i < (cnt - 1); i += 2) { 1690 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1691 wmb(); /* force write every 32 bytes */ 1692 srcp += 2; 1693 dstp += 2; 1694 } 1695 } else { 1696 /* submit all but the first request, and ensure 1697 that it is submitted below */ 1698 mxge_submit_req_backwards(tx, src, cnt); 1699 i = 0; 1700 } 1701 if (i < cnt) { 1702 /* submit the first request */ 1703 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1704 wmb(); /* barrier before setting valid flag */ 1705 } 1706 1707 /* re-write the last 32-bits with the valid flags */ 1708 src->flags = last_flags; 1709 src_ints = (uint32_t *)src; 1710 src_ints+=3; 1711 dst_ints = (volatile uint32_t *)dst; 1712 dst_ints+=3; 1713 *dst_ints = *src_ints; 1714 tx->req += cnt; 1715 wmb(); 1716 } 1717 1718 static int 1719 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1720 struct mxge_pkt_info *pi) 1721 { 1722 struct ether_vlan_header *eh; 1723 uint16_t etype; 1724 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1725 #if IFCAP_TSO6 && defined(INET6) 1726 int nxt; 1727 #endif 1728 1729 eh = mtod(m, struct ether_vlan_header *); 1730 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1731 etype = ntohs(eh->evl_proto); 1732 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1733 } else { 1734 etype = ntohs(eh->evl_encap_proto); 1735 pi->ip_off = ETHER_HDR_LEN; 1736 } 1737 1738 switch (etype) { 1739 case ETHERTYPE_IP: 1740 /* 1741 * ensure ip header is in first mbuf, copy it to a 1742 * scratch buffer if not 1743 */ 1744 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1745 pi->ip6 = NULL; 1746 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1747 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1748 ss->scratch); 1749 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1750 } 1751 pi->ip_hlen = pi->ip->ip_hl << 2; 1752 if (!tso) 1753 return 0; 1754 1755 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1756 sizeof(struct tcphdr))) { 1757 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1758 sizeof(struct tcphdr), ss->scratch); 1759 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1760 } 1761 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1762 break; 1763 #if IFCAP_TSO6 && defined(INET6) 1764 case ETHERTYPE_IPV6: 1765 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1766 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1767 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1768 ss->scratch); 1769 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1770 } 1771 nxt = 0; 1772 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1773 pi->ip_hlen -= pi->ip_off; 1774 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1775 return EINVAL; 1776 1777 if (!tso) 1778 return 0; 1779 1780 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1781 return EINVAL; 1782 1783 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1784 sizeof(struct tcphdr))) { 1785 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1786 sizeof(struct tcphdr), ss->scratch); 1787 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1788 } 1789 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1790 break; 1791 #endif 1792 default: 1793 return EINVAL; 1794 } 1795 return 0; 1796 } 1797 1798 #if IFCAP_TSO4 1799 1800 static void 1801 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1802 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1803 { 1804 mxge_tx_ring_t *tx; 1805 mcp_kreq_ether_send_t *req; 1806 bus_dma_segment_t *seg; 1807 uint32_t low, high_swapped; 1808 int len, seglen, cum_len, cum_len_next; 1809 int next_is_first, chop, cnt, rdma_count, small; 1810 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1811 uint8_t flags, flags_next; 1812 static int once; 1813 1814 mss = m->m_pkthdr.tso_segsz; 1815 1816 /* negative cum_len signifies to the 1817 * send loop that we are still in the 1818 * header portion of the TSO packet. 1819 */ 1820 1821 cksum_offset = pi->ip_off + pi->ip_hlen; 1822 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1823 1824 /* TSO implies checksum offload on this hardware */ 1825 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1826 /* 1827 * If packet has full TCP csum, replace it with pseudo hdr 1828 * sum that the NIC expects, otherwise the NIC will emit 1829 * packets with bad TCP checksums. 1830 */ 1831 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1832 if (pi->ip6) { 1833 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1834 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1835 sum = in6_cksum_pseudo(pi->ip6, 1836 m->m_pkthdr.len - cksum_offset, 1837 IPPROTO_TCP, 0); 1838 #endif 1839 } else { 1840 #ifdef INET 1841 m->m_pkthdr.csum_flags |= CSUM_TCP; 1842 sum = in_pseudo(pi->ip->ip_src.s_addr, 1843 pi->ip->ip_dst.s_addr, 1844 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1845 cksum_offset))); 1846 #endif 1847 } 1848 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1849 cksum_offset, sizeof(sum), (caddr_t)&sum); 1850 } 1851 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1852 1853 /* for TSO, pseudo_hdr_offset holds mss. 1854 * The firmware figures out where to put 1855 * the checksum by parsing the header. */ 1856 pseudo_hdr_offset = htobe16(mss); 1857 1858 if (pi->ip6) { 1859 /* 1860 * for IPv6 TSO, the "checksum offset" is re-purposed 1861 * to store the TCP header len 1862 */ 1863 cksum_offset = (pi->tcp->th_off << 2); 1864 } 1865 1866 tx = &ss->tx; 1867 req = tx->req_list; 1868 seg = tx->seg_list; 1869 cnt = 0; 1870 rdma_count = 0; 1871 /* "rdma_count" is the number of RDMAs belonging to the 1872 * current packet BEFORE the current send request. For 1873 * non-TSO packets, this is equal to "count". 1874 * For TSO packets, rdma_count needs to be reset 1875 * to 0 after a segment cut. 1876 * 1877 * The rdma_count field of the send request is 1878 * the number of RDMAs of the packet starting at 1879 * that request. For TSO send requests with one ore more cuts 1880 * in the middle, this is the number of RDMAs starting 1881 * after the last cut in the request. All previous 1882 * segments before the last cut implicitly have 1 RDMA. 1883 * 1884 * Since the number of RDMAs is not known beforehand, 1885 * it must be filled-in retroactively - after each 1886 * segmentation cut or at the end of the entire packet. 1887 */ 1888 1889 while (busdma_seg_cnt) { 1890 /* Break the busdma segment up into pieces*/ 1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1893 len = seg->ds_len; 1894 1895 while (len) { 1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1897 seglen = len; 1898 cum_len_next = cum_len + seglen; 1899 (req-rdma_count)->rdma_count = rdma_count + 1; 1900 if (__predict_true(cum_len >= 0)) { 1901 /* payload */ 1902 chop = (cum_len_next > mss); 1903 cum_len_next = cum_len_next % mss; 1904 next_is_first = (cum_len_next == 0); 1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1906 flags_next |= next_is_first * 1907 MXGEFW_FLAGS_FIRST; 1908 rdma_count |= -(chop | next_is_first); 1909 rdma_count += chop & !next_is_first; 1910 } else if (cum_len_next >= 0) { 1911 /* header ends */ 1912 rdma_count = -1; 1913 cum_len_next = 0; 1914 seglen = -cum_len; 1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1916 flags_next = MXGEFW_FLAGS_TSO_PLD | 1917 MXGEFW_FLAGS_FIRST | 1918 (small * MXGEFW_FLAGS_SMALL); 1919 } 1920 1921 req->addr_high = high_swapped; 1922 req->addr_low = htobe32(low); 1923 req->pseudo_hdr_offset = pseudo_hdr_offset; 1924 req->pad = 0; 1925 req->rdma_count = 1; 1926 req->length = htobe16(seglen); 1927 req->cksum_offset = cksum_offset; 1928 req->flags = flags | ((cum_len & 1) * 1929 MXGEFW_FLAGS_ALIGN_ODD); 1930 low += seglen; 1931 len -= seglen; 1932 cum_len = cum_len_next; 1933 flags = flags_next; 1934 req++; 1935 cnt++; 1936 rdma_count++; 1937 if (cksum_offset != 0 && !pi->ip6) { 1938 if (__predict_false(cksum_offset > seglen)) 1939 cksum_offset -= seglen; 1940 else 1941 cksum_offset = 0; 1942 } 1943 if (__predict_false(cnt > tx->max_desc)) 1944 goto drop; 1945 } 1946 busdma_seg_cnt--; 1947 seg++; 1948 } 1949 (req-rdma_count)->rdma_count = rdma_count; 1950 1951 do { 1952 req--; 1953 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1954 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1955 1956 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1957 mxge_submit_req(tx, tx->req_list, cnt); 1958 1959 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1960 /* tell the NIC to start polling this slice */ 1961 *tx->send_go = 1; 1962 tx->queue_active = 1; 1963 tx->activate++; 1964 wmb(); 1965 } 1966 1967 return; 1968 1969 drop: 1970 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1971 m_freem(m); 1972 ss->oerrors++; 1973 if (!once) { 1974 printf("tx->max_desc exceeded via TSO!\n"); 1975 printf("mss = %d, %ld, %d!\n", mss, 1976 (long)seg - (long)tx->seg_list, tx->max_desc); 1977 once = 1; 1978 } 1979 return; 1980 1981 } 1982 1983 #endif /* IFCAP_TSO4 */ 1984 1985 #ifdef MXGE_NEW_VLAN_API 1986 /* 1987 * We reproduce the software vlan tag insertion from 1988 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1989 * vlan tag insertion. We need to advertise this in order to have the 1990 * vlan interface respect our csum offload flags. 1991 */ 1992 static struct mbuf * 1993 mxge_vlan_tag_insert(struct mbuf *m) 1994 { 1995 struct ether_vlan_header *evl; 1996 1997 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 1998 if (__predict_false(m == NULL)) 1999 return NULL; 2000 if (m->m_len < sizeof(*evl)) { 2001 m = m_pullup(m, sizeof(*evl)); 2002 if (__predict_false(m == NULL)) 2003 return NULL; 2004 } 2005 /* 2006 * Transform the Ethernet header into an Ethernet header 2007 * with 802.1Q encapsulation. 2008 */ 2009 evl = mtod(m, struct ether_vlan_header *); 2010 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2011 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2012 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2013 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2014 m->m_flags &= ~M_VLANTAG; 2015 return m; 2016 } 2017 #endif /* MXGE_NEW_VLAN_API */ 2018 2019 static void 2020 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2021 { 2022 struct mxge_pkt_info pi = {0,0,0,0}; 2023 mxge_softc_t *sc; 2024 mcp_kreq_ether_send_t *req; 2025 bus_dma_segment_t *seg; 2026 struct mbuf *m_tmp; 2027 mxge_tx_ring_t *tx; 2028 int cnt, cum_len, err, i, idx, odd_flag; 2029 uint16_t pseudo_hdr_offset; 2030 uint8_t flags, cksum_offset; 2031 2032 sc = ss->sc; 2033 tx = &ss->tx; 2034 2035 #ifdef MXGE_NEW_VLAN_API 2036 if (m->m_flags & M_VLANTAG) { 2037 m = mxge_vlan_tag_insert(m); 2038 if (__predict_false(m == NULL)) 2039 goto drop_without_m; 2040 } 2041 #endif 2042 if (m->m_pkthdr.csum_flags & 2043 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2044 if (mxge_parse_tx(ss, m, &pi)) 2045 goto drop; 2046 } 2047 2048 /* (try to) map the frame for DMA */ 2049 idx = tx->req & tx->mask; 2050 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2051 m, tx->seg_list, &cnt, 2052 BUS_DMA_NOWAIT); 2053 if (__predict_false(err == EFBIG)) { 2054 /* Too many segments in the chain. Try 2055 to defrag */ 2056 m_tmp = m_defrag(m, M_NOWAIT); 2057 if (m_tmp == NULL) { 2058 goto drop; 2059 } 2060 ss->tx.defrag++; 2061 m = m_tmp; 2062 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2063 tx->info[idx].map, 2064 m, tx->seg_list, &cnt, 2065 BUS_DMA_NOWAIT); 2066 } 2067 if (__predict_false(err != 0)) { 2068 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2069 " packet len = %d\n", err, m->m_pkthdr.len); 2070 goto drop; 2071 } 2072 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2073 BUS_DMASYNC_PREWRITE); 2074 tx->info[idx].m = m; 2075 2076 #if IFCAP_TSO4 2077 /* TSO is different enough, we handle it in another routine */ 2078 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2079 mxge_encap_tso(ss, m, cnt, &pi); 2080 return; 2081 } 2082 #endif 2083 2084 req = tx->req_list; 2085 cksum_offset = 0; 2086 pseudo_hdr_offset = 0; 2087 flags = MXGEFW_FLAGS_NO_TSO; 2088 2089 /* checksum offloading? */ 2090 if (m->m_pkthdr.csum_flags & 2091 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2092 /* ensure ip header is in first mbuf, copy 2093 it to a scratch buffer if not */ 2094 cksum_offset = pi.ip_off + pi.ip_hlen; 2095 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2096 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2097 req->cksum_offset = cksum_offset; 2098 flags |= MXGEFW_FLAGS_CKSUM; 2099 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2100 } else { 2101 odd_flag = 0; 2102 } 2103 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2104 flags |= MXGEFW_FLAGS_SMALL; 2105 2106 /* convert segments into a request list */ 2107 cum_len = 0; 2108 seg = tx->seg_list; 2109 req->flags = MXGEFW_FLAGS_FIRST; 2110 for (i = 0; i < cnt; i++) { 2111 req->addr_low = 2112 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2113 req->addr_high = 2114 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2115 req->length = htobe16(seg->ds_len); 2116 req->cksum_offset = cksum_offset; 2117 if (cksum_offset > seg->ds_len) 2118 cksum_offset -= seg->ds_len; 2119 else 2120 cksum_offset = 0; 2121 req->pseudo_hdr_offset = pseudo_hdr_offset; 2122 req->pad = 0; /* complete solid 16-byte block */ 2123 req->rdma_count = 1; 2124 req->flags |= flags | ((cum_len & 1) * odd_flag); 2125 cum_len += seg->ds_len; 2126 seg++; 2127 req++; 2128 req->flags = 0; 2129 } 2130 req--; 2131 /* pad runts to 60 bytes */ 2132 if (cum_len < 60) { 2133 req++; 2134 req->addr_low = 2135 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2136 req->addr_high = 2137 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2138 req->length = htobe16(60 - cum_len); 2139 req->cksum_offset = 0; 2140 req->pseudo_hdr_offset = pseudo_hdr_offset; 2141 req->pad = 0; /* complete solid 16-byte block */ 2142 req->rdma_count = 1; 2143 req->flags |= flags | ((cum_len & 1) * odd_flag); 2144 cnt++; 2145 } 2146 2147 tx->req_list[0].rdma_count = cnt; 2148 #if 0 2149 /* print what the firmware will see */ 2150 for (i = 0; i < cnt; i++) { 2151 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2152 "cso:%d, flags:0x%x, rdma:%d\n", 2153 i, (int)ntohl(tx->req_list[i].addr_high), 2154 (int)ntohl(tx->req_list[i].addr_low), 2155 (int)ntohs(tx->req_list[i].length), 2156 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2157 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2158 tx->req_list[i].rdma_count); 2159 } 2160 printf("--------------\n"); 2161 #endif 2162 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2163 mxge_submit_req(tx, tx->req_list, cnt); 2164 2165 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2166 /* tell the NIC to start polling this slice */ 2167 *tx->send_go = 1; 2168 tx->queue_active = 1; 2169 tx->activate++; 2170 wmb(); 2171 } 2172 2173 return; 2174 2175 drop: 2176 m_freem(m); 2177 drop_without_m: 2178 ss->oerrors++; 2179 return; 2180 } 2181 2182 static void 2183 mxge_qflush(struct ifnet *ifp) 2184 { 2185 mxge_softc_t *sc = ifp->if_softc; 2186 mxge_tx_ring_t *tx; 2187 struct mbuf *m; 2188 int slice; 2189 2190 for (slice = 0; slice < sc->num_slices; slice++) { 2191 tx = &sc->ss[slice].tx; 2192 mtx_lock(&tx->mtx); 2193 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2194 m_freem(m); 2195 mtx_unlock(&tx->mtx); 2196 } 2197 if_qflush(ifp); 2198 } 2199 2200 static inline void 2201 mxge_start_locked(struct mxge_slice_state *ss) 2202 { 2203 mxge_softc_t *sc; 2204 struct mbuf *m; 2205 struct ifnet *ifp; 2206 mxge_tx_ring_t *tx; 2207 2208 sc = ss->sc; 2209 ifp = sc->ifp; 2210 tx = &ss->tx; 2211 2212 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2213 m = drbr_dequeue(ifp, tx->br); 2214 if (m == NULL) { 2215 return; 2216 } 2217 /* let BPF see it */ 2218 BPF_MTAP(ifp, m); 2219 2220 /* give it to the nic */ 2221 mxge_encap(ss, m); 2222 } 2223 /* ran out of transmit slots */ 2224 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2225 && (!drbr_empty(ifp, tx->br))) { 2226 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2227 tx->stall++; 2228 } 2229 } 2230 2231 static int 2232 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2233 { 2234 mxge_softc_t *sc; 2235 struct ifnet *ifp; 2236 mxge_tx_ring_t *tx; 2237 int err; 2238 2239 sc = ss->sc; 2240 ifp = sc->ifp; 2241 tx = &ss->tx; 2242 2243 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2244 IFF_DRV_RUNNING) { 2245 err = drbr_enqueue(ifp, tx->br, m); 2246 return (err); 2247 } 2248 2249 if (!drbr_needs_enqueue(ifp, tx->br) && 2250 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2251 /* let BPF see it */ 2252 BPF_MTAP(ifp, m); 2253 /* give it to the nic */ 2254 mxge_encap(ss, m); 2255 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2256 return (err); 2257 } 2258 if (!drbr_empty(ifp, tx->br)) 2259 mxge_start_locked(ss); 2260 return (0); 2261 } 2262 2263 static int 2264 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2265 { 2266 mxge_softc_t *sc = ifp->if_softc; 2267 struct mxge_slice_state *ss; 2268 mxge_tx_ring_t *tx; 2269 int err = 0; 2270 int slice; 2271 2272 slice = m->m_pkthdr.flowid; 2273 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2274 2275 ss = &sc->ss[slice]; 2276 tx = &ss->tx; 2277 2278 if (mtx_trylock(&tx->mtx)) { 2279 err = mxge_transmit_locked(ss, m); 2280 mtx_unlock(&tx->mtx); 2281 } else { 2282 err = drbr_enqueue(ifp, tx->br, m); 2283 } 2284 2285 return (err); 2286 } 2287 2288 static void 2289 mxge_start(struct ifnet *ifp) 2290 { 2291 mxge_softc_t *sc = ifp->if_softc; 2292 struct mxge_slice_state *ss; 2293 2294 /* only use the first slice for now */ 2295 ss = &sc->ss[0]; 2296 mtx_lock(&ss->tx.mtx); 2297 mxge_start_locked(ss); 2298 mtx_unlock(&ss->tx.mtx); 2299 } 2300 2301 /* 2302 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2303 * at most 32 bytes at a time, so as to avoid involving the software 2304 * pio handler in the nic. We re-write the first segment's low 2305 * DMA address to mark it valid only after we write the entire chunk 2306 * in a burst 2307 */ 2308 static inline void 2309 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2310 mcp_kreq_ether_recv_t *src) 2311 { 2312 uint32_t low; 2313 2314 low = src->addr_low; 2315 src->addr_low = 0xffffffff; 2316 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2317 wmb(); 2318 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2319 wmb(); 2320 src->addr_low = low; 2321 dst->addr_low = low; 2322 wmb(); 2323 } 2324 2325 static int 2326 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2327 { 2328 bus_dma_segment_t seg; 2329 struct mbuf *m; 2330 mxge_rx_ring_t *rx = &ss->rx_small; 2331 int cnt, err; 2332 2333 m = m_gethdr(M_NOWAIT, MT_DATA); 2334 if (m == NULL) { 2335 rx->alloc_fail++; 2336 err = ENOBUFS; 2337 goto done; 2338 } 2339 m->m_len = MHLEN; 2340 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2341 &seg, &cnt, BUS_DMA_NOWAIT); 2342 if (err != 0) { 2343 m_free(m); 2344 goto done; 2345 } 2346 rx->info[idx].m = m; 2347 rx->shadow[idx].addr_low = 2348 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2349 rx->shadow[idx].addr_high = 2350 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2351 2352 done: 2353 if ((idx & 7) == 7) 2354 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2355 return err; 2356 } 2357 2358 static int 2359 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2360 { 2361 bus_dma_segment_t seg[3]; 2362 struct mbuf *m; 2363 mxge_rx_ring_t *rx = &ss->rx_big; 2364 int cnt, err, i; 2365 2366 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2367 if (m == NULL) { 2368 rx->alloc_fail++; 2369 err = ENOBUFS; 2370 goto done; 2371 } 2372 m->m_len = rx->mlen; 2373 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2374 seg, &cnt, BUS_DMA_NOWAIT); 2375 if (err != 0) { 2376 m_free(m); 2377 goto done; 2378 } 2379 rx->info[idx].m = m; 2380 rx->shadow[idx].addr_low = 2381 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2382 rx->shadow[idx].addr_high = 2383 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2384 2385 done: 2386 for (i = 0; i < rx->nbufs; i++) { 2387 if ((idx & 7) == 7) { 2388 mxge_submit_8rx(&rx->lanai[idx - 7], 2389 &rx->shadow[idx - 7]); 2390 } 2391 idx++; 2392 } 2393 return err; 2394 } 2395 2396 #ifdef INET6 2397 2398 static uint16_t 2399 mxge_csum_generic(uint16_t *raw, int len) 2400 { 2401 uint32_t csum; 2402 2403 csum = 0; 2404 while (len > 0) { 2405 csum += *raw; 2406 raw++; 2407 len -= 2; 2408 } 2409 csum = (csum >> 16) + (csum & 0xffff); 2410 csum = (csum >> 16) + (csum & 0xffff); 2411 return (uint16_t)csum; 2412 } 2413 2414 static inline uint16_t 2415 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2416 { 2417 uint32_t partial; 2418 int nxt, cksum_offset; 2419 struct ip6_hdr *ip6 = p; 2420 uint16_t c; 2421 2422 nxt = ip6->ip6_nxt; 2423 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2424 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2425 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2426 IPPROTO_IPV6, &nxt); 2427 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2428 return (1); 2429 } 2430 2431 /* 2432 * IPv6 headers do not contain a checksum, and hence 2433 * do not checksum to zero, so they don't "fall out" 2434 * of the partial checksum calculation like IPv4 2435 * headers do. We need to fix the partial checksum by 2436 * subtracting the checksum of the IPv6 header. 2437 */ 2438 2439 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2440 ETHER_HDR_LEN); 2441 csum += ~partial; 2442 csum += (csum < ~partial); 2443 csum = (csum >> 16) + (csum & 0xFFFF); 2444 csum = (csum >> 16) + (csum & 0xFFFF); 2445 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2446 csum); 2447 c ^= 0xffff; 2448 return (c); 2449 } 2450 #endif /* INET6 */ 2451 /* 2452 * Myri10GE hardware checksums are not valid if the sender 2453 * padded the frame with non-zero padding. This is because 2454 * the firmware just does a simple 16-bit 1s complement 2455 * checksum across the entire frame, excluding the first 14 2456 * bytes. It is best to simply to check the checksum and 2457 * tell the stack about it only if the checksum is good 2458 */ 2459 2460 static inline uint16_t 2461 mxge_rx_csum(struct mbuf *m, int csum) 2462 { 2463 struct ether_header *eh; 2464 #ifdef INET 2465 struct ip *ip; 2466 #endif 2467 #if defined(INET) || defined(INET6) 2468 int cap = m->m_pkthdr.rcvif->if_capenable; 2469 #endif 2470 uint16_t c, etype; 2471 2472 eh = mtod(m, struct ether_header *); 2473 etype = ntohs(eh->ether_type); 2474 switch (etype) { 2475 #ifdef INET 2476 case ETHERTYPE_IP: 2477 if ((cap & IFCAP_RXCSUM) == 0) 2478 return (1); 2479 ip = (struct ip *)(eh + 1); 2480 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2481 return (1); 2482 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2483 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2484 (ip->ip_hl << 2) + ip->ip_p)); 2485 c ^= 0xffff; 2486 break; 2487 #endif 2488 #ifdef INET6 2489 case ETHERTYPE_IPV6: 2490 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2491 return (1); 2492 c = mxge_rx_csum6((eh + 1), m, csum); 2493 break; 2494 #endif 2495 default: 2496 c = 1; 2497 } 2498 return (c); 2499 } 2500 2501 static void 2502 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2503 { 2504 struct ether_vlan_header *evl; 2505 uint32_t partial; 2506 2507 evl = mtod(m, struct ether_vlan_header *); 2508 2509 /* 2510 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2511 * after what the firmware thought was the end of the ethernet 2512 * header. 2513 */ 2514 2515 /* put checksum into host byte order */ 2516 *csum = ntohs(*csum); 2517 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2518 (*csum) += ~partial; 2519 (*csum) += ((*csum) < ~partial); 2520 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2521 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2522 2523 /* restore checksum to network byte order; 2524 later consumers expect this */ 2525 *csum = htons(*csum); 2526 2527 /* save the tag */ 2528 #ifdef MXGE_NEW_VLAN_API 2529 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2530 #else 2531 { 2532 struct m_tag *mtag; 2533 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2534 M_NOWAIT); 2535 if (mtag == NULL) 2536 return; 2537 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2538 m_tag_prepend(m, mtag); 2539 } 2540 2541 #endif 2542 m->m_flags |= M_VLANTAG; 2543 2544 /* 2545 * Remove the 802.1q header by copying the Ethernet 2546 * addresses over it and adjusting the beginning of 2547 * the data in the mbuf. The encapsulated Ethernet 2548 * type field is already in place. 2549 */ 2550 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2551 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2552 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2553 } 2554 2555 static inline void 2556 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2557 uint32_t csum, int lro) 2558 { 2559 mxge_softc_t *sc; 2560 struct ifnet *ifp; 2561 struct mbuf *m; 2562 struct ether_header *eh; 2563 mxge_rx_ring_t *rx; 2564 bus_dmamap_t old_map; 2565 int idx; 2566 2567 sc = ss->sc; 2568 ifp = sc->ifp; 2569 rx = &ss->rx_big; 2570 idx = rx->cnt & rx->mask; 2571 rx->cnt += rx->nbufs; 2572 /* save a pointer to the received mbuf */ 2573 m = rx->info[idx].m; 2574 /* try to replace the received mbuf */ 2575 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2576 /* drop the frame -- the old mbuf is re-cycled */ 2577 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2578 return; 2579 } 2580 2581 /* unmap the received buffer */ 2582 old_map = rx->info[idx].map; 2583 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2584 bus_dmamap_unload(rx->dmat, old_map); 2585 2586 /* swap the bus_dmamap_t's */ 2587 rx->info[idx].map = rx->extra_map; 2588 rx->extra_map = old_map; 2589 2590 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2591 * aligned */ 2592 m->m_data += MXGEFW_PAD; 2593 2594 m->m_pkthdr.rcvif = ifp; 2595 m->m_len = m->m_pkthdr.len = len; 2596 ss->ipackets++; 2597 eh = mtod(m, struct ether_header *); 2598 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2599 mxge_vlan_tag_remove(m, &csum); 2600 } 2601 /* flowid only valid if RSS hashing is enabled */ 2602 if (sc->num_slices > 1) { 2603 m->m_pkthdr.flowid = (ss - sc->ss); 2604 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2605 } 2606 /* if the checksum is valid, mark it in the mbuf header */ 2607 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2608 (0 == mxge_rx_csum(m, csum))) { 2609 /* Tell the stack that the checksum is good */ 2610 m->m_pkthdr.csum_data = 0xffff; 2611 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2612 CSUM_DATA_VALID; 2613 2614 #if defined(INET) || defined (INET6) 2615 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2616 return; 2617 #endif 2618 } 2619 /* pass the frame up the stack */ 2620 (*ifp->if_input)(ifp, m); 2621 } 2622 2623 static inline void 2624 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2625 uint32_t csum, int lro) 2626 { 2627 mxge_softc_t *sc; 2628 struct ifnet *ifp; 2629 struct ether_header *eh; 2630 struct mbuf *m; 2631 mxge_rx_ring_t *rx; 2632 bus_dmamap_t old_map; 2633 int idx; 2634 2635 sc = ss->sc; 2636 ifp = sc->ifp; 2637 rx = &ss->rx_small; 2638 idx = rx->cnt & rx->mask; 2639 rx->cnt++; 2640 /* save a pointer to the received mbuf */ 2641 m = rx->info[idx].m; 2642 /* try to replace the received mbuf */ 2643 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2644 /* drop the frame -- the old mbuf is re-cycled */ 2645 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2646 return; 2647 } 2648 2649 /* unmap the received buffer */ 2650 old_map = rx->info[idx].map; 2651 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2652 bus_dmamap_unload(rx->dmat, old_map); 2653 2654 /* swap the bus_dmamap_t's */ 2655 rx->info[idx].map = rx->extra_map; 2656 rx->extra_map = old_map; 2657 2658 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2659 * aligned */ 2660 m->m_data += MXGEFW_PAD; 2661 2662 m->m_pkthdr.rcvif = ifp; 2663 m->m_len = m->m_pkthdr.len = len; 2664 ss->ipackets++; 2665 eh = mtod(m, struct ether_header *); 2666 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2667 mxge_vlan_tag_remove(m, &csum); 2668 } 2669 /* flowid only valid if RSS hashing is enabled */ 2670 if (sc->num_slices > 1) { 2671 m->m_pkthdr.flowid = (ss - sc->ss); 2672 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2673 } 2674 /* if the checksum is valid, mark it in the mbuf header */ 2675 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2676 (0 == mxge_rx_csum(m, csum))) { 2677 /* Tell the stack that the checksum is good */ 2678 m->m_pkthdr.csum_data = 0xffff; 2679 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2680 CSUM_DATA_VALID; 2681 2682 #if defined(INET) || defined (INET6) 2683 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2684 return; 2685 #endif 2686 } 2687 /* pass the frame up the stack */ 2688 (*ifp->if_input)(ifp, m); 2689 } 2690 2691 static inline void 2692 mxge_clean_rx_done(struct mxge_slice_state *ss) 2693 { 2694 mxge_rx_done_t *rx_done = &ss->rx_done; 2695 int limit = 0; 2696 uint16_t length; 2697 uint16_t checksum; 2698 int lro; 2699 2700 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2701 while (rx_done->entry[rx_done->idx].length != 0) { 2702 length = ntohs(rx_done->entry[rx_done->idx].length); 2703 rx_done->entry[rx_done->idx].length = 0; 2704 checksum = rx_done->entry[rx_done->idx].checksum; 2705 if (length <= (MHLEN - MXGEFW_PAD)) 2706 mxge_rx_done_small(ss, length, checksum, lro); 2707 else 2708 mxge_rx_done_big(ss, length, checksum, lro); 2709 rx_done->cnt++; 2710 rx_done->idx = rx_done->cnt & rx_done->mask; 2711 2712 /* limit potential for livelock */ 2713 if (__predict_false(++limit > rx_done->mask / 2)) 2714 break; 2715 } 2716 #if defined(INET) || defined (INET6) 2717 tcp_lro_flush_all(&ss->lc); 2718 #endif 2719 } 2720 2721 static inline void 2722 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2723 { 2724 struct ifnet *ifp __unused; 2725 mxge_tx_ring_t *tx; 2726 struct mbuf *m; 2727 bus_dmamap_t map; 2728 int idx; 2729 int *flags; 2730 2731 tx = &ss->tx; 2732 ifp = ss->sc->ifp; 2733 while (tx->pkt_done != mcp_idx) { 2734 idx = tx->done & tx->mask; 2735 tx->done++; 2736 m = tx->info[idx].m; 2737 /* mbuf and DMA map only attached to the first 2738 segment per-mbuf */ 2739 if (m != NULL) { 2740 ss->obytes += m->m_pkthdr.len; 2741 if (m->m_flags & M_MCAST) 2742 ss->omcasts++; 2743 ss->opackets++; 2744 tx->info[idx].m = NULL; 2745 map = tx->info[idx].map; 2746 bus_dmamap_unload(tx->dmat, map); 2747 m_freem(m); 2748 } 2749 if (tx->info[idx].flag) { 2750 tx->info[idx].flag = 0; 2751 tx->pkt_done++; 2752 } 2753 } 2754 2755 /* If we have space, clear IFF_OACTIVE to tell the stack that 2756 its OK to send packets */ 2757 flags = &ss->if_drv_flags; 2758 2759 mtx_lock(&ss->tx.mtx); 2760 if ((*flags) & IFF_DRV_OACTIVE && 2761 tx->req - tx->done < (tx->mask + 1)/4) { 2762 *(flags) &= ~IFF_DRV_OACTIVE; 2763 ss->tx.wake++; 2764 mxge_start_locked(ss); 2765 } 2766 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2767 /* let the NIC stop polling this queue, since there 2768 * are no more transmits pending */ 2769 if (tx->req == tx->done) { 2770 *tx->send_stop = 1; 2771 tx->queue_active = 0; 2772 tx->deactivate++; 2773 wmb(); 2774 } 2775 } 2776 mtx_unlock(&ss->tx.mtx); 2777 } 2778 2779 static struct mxge_media_type mxge_xfp_media_types[] = 2780 { 2781 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2782 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2783 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2784 {0, (1 << 5), "10GBASE-ER"}, 2785 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2786 {0, (1 << 3), "10GBASE-SW"}, 2787 {0, (1 << 2), "10GBASE-LW"}, 2788 {0, (1 << 1), "10GBASE-EW"}, 2789 {0, (1 << 0), "Reserved"} 2790 }; 2791 static struct mxge_media_type mxge_sfp_media_types[] = 2792 { 2793 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2794 {0, (1 << 7), "Reserved"}, 2795 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2796 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2797 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2798 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2799 }; 2800 2801 static void 2802 mxge_media_set(mxge_softc_t *sc, int media_type) 2803 { 2804 2805 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2806 0, NULL); 2807 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2808 sc->current_media = media_type; 2809 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2810 } 2811 2812 static void 2813 mxge_media_init(mxge_softc_t *sc) 2814 { 2815 char *ptr; 2816 int i; 2817 2818 ifmedia_removeall(&sc->media); 2819 mxge_media_set(sc, IFM_AUTO); 2820 2821 /* 2822 * parse the product code to deterimine the interface type 2823 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2824 * after the 3rd dash in the driver's cached copy of the 2825 * EEPROM's product code string. 2826 */ 2827 ptr = sc->product_code_string; 2828 if (ptr == NULL) { 2829 device_printf(sc->dev, "Missing product code\n"); 2830 return; 2831 } 2832 2833 for (i = 0; i < 3; i++, ptr++) { 2834 ptr = strchr(ptr, '-'); 2835 if (ptr == NULL) { 2836 device_printf(sc->dev, 2837 "only %d dashes in PC?!?\n", i); 2838 return; 2839 } 2840 } 2841 if (*ptr == 'C' || *(ptr +1) == 'C') { 2842 /* -C is CX4 */ 2843 sc->connector = MXGE_CX4; 2844 mxge_media_set(sc, IFM_10G_CX4); 2845 } else if (*ptr == 'Q') { 2846 /* -Q is Quad Ribbon Fiber */ 2847 sc->connector = MXGE_QRF; 2848 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2849 /* FreeBSD has no media type for Quad ribbon fiber */ 2850 } else if (*ptr == 'R') { 2851 /* -R is XFP */ 2852 sc->connector = MXGE_XFP; 2853 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2854 /* -S or -2S is SFP+ */ 2855 sc->connector = MXGE_SFP; 2856 } else { 2857 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2858 } 2859 } 2860 2861 /* 2862 * Determine the media type for a NIC. Some XFPs will identify 2863 * themselves only when their link is up, so this is initiated via a 2864 * link up interrupt. However, this can potentially take up to 2865 * several milliseconds, so it is run via the watchdog routine, rather 2866 * than in the interrupt handler itself. 2867 */ 2868 static void 2869 mxge_media_probe(mxge_softc_t *sc) 2870 { 2871 mxge_cmd_t cmd; 2872 char *cage_type; 2873 2874 struct mxge_media_type *mxge_media_types = NULL; 2875 int i, err, ms, mxge_media_type_entries; 2876 uint32_t byte; 2877 2878 sc->need_media_probe = 0; 2879 2880 if (sc->connector == MXGE_XFP) { 2881 /* -R is XFP */ 2882 mxge_media_types = mxge_xfp_media_types; 2883 mxge_media_type_entries = 2884 nitems(mxge_xfp_media_types); 2885 byte = MXGE_XFP_COMPLIANCE_BYTE; 2886 cage_type = "XFP"; 2887 } else if (sc->connector == MXGE_SFP) { 2888 /* -S or -2S is SFP+ */ 2889 mxge_media_types = mxge_sfp_media_types; 2890 mxge_media_type_entries = 2891 nitems(mxge_sfp_media_types); 2892 cage_type = "SFP+"; 2893 byte = 3; 2894 } else { 2895 /* nothing to do; media type cannot change */ 2896 return; 2897 } 2898 2899 /* 2900 * At this point we know the NIC has an XFP cage, so now we 2901 * try to determine what is in the cage by using the 2902 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2903 * register. We read just one byte, which may take over 2904 * a millisecond 2905 */ 2906 2907 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2908 cmd.data1 = byte; 2909 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2910 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2911 device_printf(sc->dev, "failed to read XFP\n"); 2912 } 2913 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2914 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2915 } 2916 if (err != MXGEFW_CMD_OK) { 2917 return; 2918 } 2919 2920 /* now we wait for the data to be cached */ 2921 cmd.data0 = byte; 2922 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2923 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2924 DELAY(1000); 2925 cmd.data0 = byte; 2926 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2927 } 2928 if (err != MXGEFW_CMD_OK) { 2929 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2930 cage_type, err, ms); 2931 return; 2932 } 2933 2934 if (cmd.data0 == mxge_media_types[0].bitmask) { 2935 if (mxge_verbose) 2936 device_printf(sc->dev, "%s:%s\n", cage_type, 2937 mxge_media_types[0].name); 2938 if (sc->current_media != mxge_media_types[0].flag) { 2939 mxge_media_init(sc); 2940 mxge_media_set(sc, mxge_media_types[0].flag); 2941 } 2942 return; 2943 } 2944 for (i = 1; i < mxge_media_type_entries; i++) { 2945 if (cmd.data0 & mxge_media_types[i].bitmask) { 2946 if (mxge_verbose) 2947 device_printf(sc->dev, "%s:%s\n", 2948 cage_type, 2949 mxge_media_types[i].name); 2950 2951 if (sc->current_media != mxge_media_types[i].flag) { 2952 mxge_media_init(sc); 2953 mxge_media_set(sc, mxge_media_types[i].flag); 2954 } 2955 return; 2956 } 2957 } 2958 if (mxge_verbose) 2959 device_printf(sc->dev, "%s media 0x%x unknown\n", 2960 cage_type, cmd.data0); 2961 2962 return; 2963 } 2964 2965 static void 2966 mxge_intr(void *arg) 2967 { 2968 struct mxge_slice_state *ss = arg; 2969 mxge_softc_t *sc = ss->sc; 2970 mcp_irq_data_t *stats = ss->fw_stats; 2971 mxge_tx_ring_t *tx = &ss->tx; 2972 mxge_rx_done_t *rx_done = &ss->rx_done; 2973 uint32_t send_done_count; 2974 uint8_t valid; 2975 2976 /* make sure the DMA has finished */ 2977 if (!stats->valid) { 2978 return; 2979 } 2980 valid = stats->valid; 2981 2982 if (sc->legacy_irq) { 2983 /* lower legacy IRQ */ 2984 *sc->irq_deassert = 0; 2985 if (!mxge_deassert_wait) 2986 /* don't wait for conf. that irq is low */ 2987 stats->valid = 0; 2988 } else { 2989 stats->valid = 0; 2990 } 2991 2992 /* loop while waiting for legacy irq deassertion */ 2993 do { 2994 /* check for transmit completes and receives */ 2995 send_done_count = be32toh(stats->send_done_count); 2996 while ((send_done_count != tx->pkt_done) || 2997 (rx_done->entry[rx_done->idx].length != 0)) { 2998 if (send_done_count != tx->pkt_done) 2999 mxge_tx_done(ss, (int)send_done_count); 3000 mxge_clean_rx_done(ss); 3001 send_done_count = be32toh(stats->send_done_count); 3002 } 3003 if (sc->legacy_irq && mxge_deassert_wait) 3004 wmb(); 3005 } while (*((volatile uint8_t *) &stats->valid)); 3006 3007 /* fw link & error stats meaningful only on the first slice */ 3008 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3009 if (sc->link_state != stats->link_up) { 3010 sc->link_state = stats->link_up; 3011 if (sc->link_state) { 3012 if_link_state_change(sc->ifp, LINK_STATE_UP); 3013 if (mxge_verbose) 3014 device_printf(sc->dev, "link up\n"); 3015 } else { 3016 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3017 if (mxge_verbose) 3018 device_printf(sc->dev, "link down\n"); 3019 } 3020 sc->need_media_probe = 1; 3021 } 3022 if (sc->rdma_tags_available != 3023 be32toh(stats->rdma_tags_available)) { 3024 sc->rdma_tags_available = 3025 be32toh(stats->rdma_tags_available); 3026 device_printf(sc->dev, "RDMA timed out! %d tags " 3027 "left\n", sc->rdma_tags_available); 3028 } 3029 3030 if (stats->link_down) { 3031 sc->down_cnt += stats->link_down; 3032 sc->link_state = 0; 3033 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3034 } 3035 } 3036 3037 /* check to see if we have rx token to pass back */ 3038 if (valid & 0x1) 3039 *ss->irq_claim = be32toh(3); 3040 *(ss->irq_claim + 1) = be32toh(3); 3041 } 3042 3043 static void 3044 mxge_init(void *arg) 3045 { 3046 mxge_softc_t *sc = arg; 3047 struct ifnet *ifp = sc->ifp; 3048 3049 mtx_lock(&sc->driver_mtx); 3050 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3051 (void) mxge_open(sc); 3052 mtx_unlock(&sc->driver_mtx); 3053 } 3054 3055 static void 3056 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3057 { 3058 int i; 3059 3060 #if defined(INET) || defined(INET6) 3061 tcp_lro_free(&ss->lc); 3062 #endif 3063 for (i = 0; i <= ss->rx_big.mask; i++) { 3064 if (ss->rx_big.info[i].m == NULL) 3065 continue; 3066 bus_dmamap_unload(ss->rx_big.dmat, 3067 ss->rx_big.info[i].map); 3068 m_freem(ss->rx_big.info[i].m); 3069 ss->rx_big.info[i].m = NULL; 3070 } 3071 3072 for (i = 0; i <= ss->rx_small.mask; i++) { 3073 if (ss->rx_small.info[i].m == NULL) 3074 continue; 3075 bus_dmamap_unload(ss->rx_small.dmat, 3076 ss->rx_small.info[i].map); 3077 m_freem(ss->rx_small.info[i].m); 3078 ss->rx_small.info[i].m = NULL; 3079 } 3080 3081 /* transmit ring used only on the first slice */ 3082 if (ss->tx.info == NULL) 3083 return; 3084 3085 for (i = 0; i <= ss->tx.mask; i++) { 3086 ss->tx.info[i].flag = 0; 3087 if (ss->tx.info[i].m == NULL) 3088 continue; 3089 bus_dmamap_unload(ss->tx.dmat, 3090 ss->tx.info[i].map); 3091 m_freem(ss->tx.info[i].m); 3092 ss->tx.info[i].m = NULL; 3093 } 3094 } 3095 3096 static void 3097 mxge_free_mbufs(mxge_softc_t *sc) 3098 { 3099 int slice; 3100 3101 for (slice = 0; slice < sc->num_slices; slice++) 3102 mxge_free_slice_mbufs(&sc->ss[slice]); 3103 } 3104 3105 static void 3106 mxge_free_slice_rings(struct mxge_slice_state *ss) 3107 { 3108 int i; 3109 3110 if (ss->rx_done.entry != NULL) 3111 mxge_dma_free(&ss->rx_done.dma); 3112 ss->rx_done.entry = NULL; 3113 3114 if (ss->tx.req_bytes != NULL) 3115 free(ss->tx.req_bytes, M_DEVBUF); 3116 ss->tx.req_bytes = NULL; 3117 3118 if (ss->tx.seg_list != NULL) 3119 free(ss->tx.seg_list, M_DEVBUF); 3120 ss->tx.seg_list = NULL; 3121 3122 if (ss->rx_small.shadow != NULL) 3123 free(ss->rx_small.shadow, M_DEVBUF); 3124 ss->rx_small.shadow = NULL; 3125 3126 if (ss->rx_big.shadow != NULL) 3127 free(ss->rx_big.shadow, M_DEVBUF); 3128 ss->rx_big.shadow = NULL; 3129 3130 if (ss->tx.info != NULL) { 3131 if (ss->tx.dmat != NULL) { 3132 for (i = 0; i <= ss->tx.mask; i++) { 3133 bus_dmamap_destroy(ss->tx.dmat, 3134 ss->tx.info[i].map); 3135 } 3136 bus_dma_tag_destroy(ss->tx.dmat); 3137 } 3138 free(ss->tx.info, M_DEVBUF); 3139 } 3140 ss->tx.info = NULL; 3141 3142 if (ss->rx_small.info != NULL) { 3143 if (ss->rx_small.dmat != NULL) { 3144 for (i = 0; i <= ss->rx_small.mask; i++) { 3145 bus_dmamap_destroy(ss->rx_small.dmat, 3146 ss->rx_small.info[i].map); 3147 } 3148 bus_dmamap_destroy(ss->rx_small.dmat, 3149 ss->rx_small.extra_map); 3150 bus_dma_tag_destroy(ss->rx_small.dmat); 3151 } 3152 free(ss->rx_small.info, M_DEVBUF); 3153 } 3154 ss->rx_small.info = NULL; 3155 3156 if (ss->rx_big.info != NULL) { 3157 if (ss->rx_big.dmat != NULL) { 3158 for (i = 0; i <= ss->rx_big.mask; i++) { 3159 bus_dmamap_destroy(ss->rx_big.dmat, 3160 ss->rx_big.info[i].map); 3161 } 3162 bus_dmamap_destroy(ss->rx_big.dmat, 3163 ss->rx_big.extra_map); 3164 bus_dma_tag_destroy(ss->rx_big.dmat); 3165 } 3166 free(ss->rx_big.info, M_DEVBUF); 3167 } 3168 ss->rx_big.info = NULL; 3169 } 3170 3171 static void 3172 mxge_free_rings(mxge_softc_t *sc) 3173 { 3174 int slice; 3175 3176 for (slice = 0; slice < sc->num_slices; slice++) 3177 mxge_free_slice_rings(&sc->ss[slice]); 3178 } 3179 3180 static int 3181 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3182 int tx_ring_entries) 3183 { 3184 mxge_softc_t *sc = ss->sc; 3185 size_t bytes; 3186 int err, i; 3187 3188 /* allocate per-slice receive resources */ 3189 3190 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3191 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3192 3193 /* allocate the rx shadow rings */ 3194 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3195 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3196 3197 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3198 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3199 3200 /* allocate the rx host info rings */ 3201 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3202 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3203 3204 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3205 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3206 3207 /* allocate the rx busdma resources */ 3208 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3209 1, /* alignment */ 3210 4096, /* boundary */ 3211 BUS_SPACE_MAXADDR, /* low */ 3212 BUS_SPACE_MAXADDR, /* high */ 3213 NULL, NULL, /* filter */ 3214 MHLEN, /* maxsize */ 3215 1, /* num segs */ 3216 MHLEN, /* maxsegsize */ 3217 BUS_DMA_ALLOCNOW, /* flags */ 3218 NULL, NULL, /* lock */ 3219 &ss->rx_small.dmat); /* tag */ 3220 if (err != 0) { 3221 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3222 err); 3223 return err; 3224 } 3225 3226 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3227 1, /* alignment */ 3228 0, /* boundary */ 3229 BUS_SPACE_MAXADDR, /* low */ 3230 BUS_SPACE_MAXADDR, /* high */ 3231 NULL, NULL, /* filter */ 3232 3*4096, /* maxsize */ 3233 1, /* num segs */ 3234 MJUM9BYTES, /* maxsegsize*/ 3235 BUS_DMA_ALLOCNOW, /* flags */ 3236 NULL, NULL, /* lock */ 3237 &ss->rx_big.dmat); /* tag */ 3238 if (err != 0) { 3239 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3240 err); 3241 return err; 3242 } 3243 for (i = 0; i <= ss->rx_small.mask; i++) { 3244 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3245 &ss->rx_small.info[i].map); 3246 if (err != 0) { 3247 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3248 err); 3249 return err; 3250 } 3251 } 3252 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3253 &ss->rx_small.extra_map); 3254 if (err != 0) { 3255 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3256 err); 3257 return err; 3258 } 3259 3260 for (i = 0; i <= ss->rx_big.mask; i++) { 3261 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3262 &ss->rx_big.info[i].map); 3263 if (err != 0) { 3264 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3265 err); 3266 return err; 3267 } 3268 } 3269 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3270 &ss->rx_big.extra_map); 3271 if (err != 0) { 3272 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3273 err); 3274 return err; 3275 } 3276 3277 /* now allocate TX resources */ 3278 3279 ss->tx.mask = tx_ring_entries - 1; 3280 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3281 3282 /* allocate the tx request copy block */ 3283 bytes = 8 + 3284 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3285 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3286 /* ensure req_list entries are aligned to 8 bytes */ 3287 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3288 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL); 3289 3290 /* allocate the tx busdma segment list */ 3291 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3292 ss->tx.seg_list = (bus_dma_segment_t *) 3293 malloc(bytes, M_DEVBUF, M_WAITOK); 3294 3295 /* allocate the tx host info ring */ 3296 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3297 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3298 3299 /* allocate the tx busdma resources */ 3300 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3301 1, /* alignment */ 3302 sc->tx_boundary, /* boundary */ 3303 BUS_SPACE_MAXADDR, /* low */ 3304 BUS_SPACE_MAXADDR, /* high */ 3305 NULL, NULL, /* filter */ 3306 65536 + 256, /* maxsize */ 3307 ss->tx.max_desc - 2, /* num segs */ 3308 sc->tx_boundary, /* maxsegsz */ 3309 BUS_DMA_ALLOCNOW, /* flags */ 3310 NULL, NULL, /* lock */ 3311 &ss->tx.dmat); /* tag */ 3312 3313 if (err != 0) { 3314 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3315 err); 3316 return err; 3317 } 3318 3319 /* now use these tags to setup dmamaps for each slot 3320 in the ring */ 3321 for (i = 0; i <= ss->tx.mask; i++) { 3322 err = bus_dmamap_create(ss->tx.dmat, 0, 3323 &ss->tx.info[i].map); 3324 if (err != 0) { 3325 device_printf(sc->dev, "Err %d tx dmamap\n", 3326 err); 3327 return err; 3328 } 3329 } 3330 return 0; 3331 3332 } 3333 3334 static int 3335 mxge_alloc_rings(mxge_softc_t *sc) 3336 { 3337 mxge_cmd_t cmd; 3338 int tx_ring_size; 3339 int tx_ring_entries, rx_ring_entries; 3340 int err, slice; 3341 3342 /* get ring sizes */ 3343 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3344 tx_ring_size = cmd.data0; 3345 if (err != 0) { 3346 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3347 goto abort; 3348 } 3349 3350 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3351 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3352 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3353 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3354 IFQ_SET_READY(&sc->ifp->if_snd); 3355 3356 for (slice = 0; slice < sc->num_slices; slice++) { 3357 err = mxge_alloc_slice_rings(&sc->ss[slice], 3358 rx_ring_entries, 3359 tx_ring_entries); 3360 if (err != 0) 3361 goto abort; 3362 } 3363 return 0; 3364 3365 abort: 3366 mxge_free_rings(sc); 3367 return err; 3368 3369 } 3370 3371 static void 3372 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3373 { 3374 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3375 3376 if (bufsize < MCLBYTES) { 3377 /* easy, everything fits in a single buffer */ 3378 *big_buf_size = MCLBYTES; 3379 *cl_size = MCLBYTES; 3380 *nbufs = 1; 3381 return; 3382 } 3383 3384 if (bufsize < MJUMPAGESIZE) { 3385 /* still easy, everything still fits in a single buffer */ 3386 *big_buf_size = MJUMPAGESIZE; 3387 *cl_size = MJUMPAGESIZE; 3388 *nbufs = 1; 3389 return; 3390 } 3391 *cl_size = MJUM9BYTES; 3392 *big_buf_size = MJUM9BYTES; 3393 *nbufs = 1; 3394 } 3395 3396 static int 3397 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3398 { 3399 mxge_softc_t *sc; 3400 mxge_cmd_t cmd; 3401 bus_dmamap_t map; 3402 int err, i, slice; 3403 3404 sc = ss->sc; 3405 slice = ss - sc->ss; 3406 3407 #if defined(INET) || defined(INET6) 3408 (void)tcp_lro_init(&ss->lc); 3409 #endif 3410 ss->lc.ifp = sc->ifp; 3411 3412 /* get the lanai pointers to the send and receive rings */ 3413 3414 err = 0; 3415 3416 cmd.data0 = slice; 3417 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3418 ss->tx.lanai = 3419 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3420 ss->tx.send_go = (volatile uint32_t *) 3421 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3422 ss->tx.send_stop = (volatile uint32_t *) 3423 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3424 3425 cmd.data0 = slice; 3426 err |= mxge_send_cmd(sc, 3427 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3428 ss->rx_small.lanai = 3429 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3430 cmd.data0 = slice; 3431 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3432 ss->rx_big.lanai = 3433 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3434 3435 if (err != 0) { 3436 device_printf(sc->dev, 3437 "failed to get ring sizes or locations\n"); 3438 return EIO; 3439 } 3440 3441 /* stock receive rings */ 3442 for (i = 0; i <= ss->rx_small.mask; i++) { 3443 map = ss->rx_small.info[i].map; 3444 err = mxge_get_buf_small(ss, map, i); 3445 if (err) { 3446 device_printf(sc->dev, "alloced %d/%d smalls\n", 3447 i, ss->rx_small.mask + 1); 3448 return ENOMEM; 3449 } 3450 } 3451 for (i = 0; i <= ss->rx_big.mask; i++) { 3452 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3453 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3454 } 3455 ss->rx_big.nbufs = nbufs; 3456 ss->rx_big.cl_size = cl_size; 3457 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3458 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3459 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3460 map = ss->rx_big.info[i].map; 3461 err = mxge_get_buf_big(ss, map, i); 3462 if (err) { 3463 device_printf(sc->dev, "alloced %d/%d bigs\n", 3464 i, ss->rx_big.mask + 1); 3465 return ENOMEM; 3466 } 3467 } 3468 return 0; 3469 } 3470 3471 static int 3472 mxge_open(mxge_softc_t *sc) 3473 { 3474 mxge_cmd_t cmd; 3475 int err, big_bytes, nbufs, slice, cl_size, i; 3476 bus_addr_t bus; 3477 volatile uint8_t *itable; 3478 struct mxge_slice_state *ss; 3479 3480 /* Copy the MAC address in case it was overridden */ 3481 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3482 3483 err = mxge_reset(sc, 1); 3484 if (err != 0) { 3485 device_printf(sc->dev, "failed to reset\n"); 3486 return EIO; 3487 } 3488 3489 if (sc->num_slices > 1) { 3490 /* setup the indirection table */ 3491 cmd.data0 = sc->num_slices; 3492 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3493 &cmd); 3494 3495 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3496 &cmd); 3497 if (err != 0) { 3498 device_printf(sc->dev, 3499 "failed to setup rss tables\n"); 3500 return err; 3501 } 3502 3503 /* just enable an identity mapping */ 3504 itable = sc->sram + cmd.data0; 3505 for (i = 0; i < sc->num_slices; i++) 3506 itable[i] = (uint8_t)i; 3507 3508 cmd.data0 = 1; 3509 cmd.data1 = mxge_rss_hash_type; 3510 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3511 if (err != 0) { 3512 device_printf(sc->dev, "failed to enable slices\n"); 3513 return err; 3514 } 3515 } 3516 3517 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3518 3519 cmd.data0 = nbufs; 3520 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3521 &cmd); 3522 /* error is only meaningful if we're trying to set 3523 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3524 if (err && nbufs > 1) { 3525 device_printf(sc->dev, 3526 "Failed to set alway-use-n to %d\n", 3527 nbufs); 3528 return EIO; 3529 } 3530 /* Give the firmware the mtu and the big and small buffer 3531 sizes. The firmware wants the big buf size to be a power 3532 of two. Luckily, FreeBSD's clusters are powers of two */ 3533 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3534 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3535 cmd.data0 = MHLEN - MXGEFW_PAD; 3536 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3537 &cmd); 3538 cmd.data0 = big_bytes; 3539 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3540 3541 if (err != 0) { 3542 device_printf(sc->dev, "failed to setup params\n"); 3543 goto abort; 3544 } 3545 3546 /* Now give him the pointer to the stats block */ 3547 for (slice = 0; slice < sc->num_slices; slice++) { 3548 ss = &sc->ss[slice]; 3549 cmd.data0 = 3550 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3551 cmd.data1 = 3552 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3553 cmd.data2 = sizeof(struct mcp_irq_data); 3554 cmd.data2 |= (slice << 16); 3555 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3556 } 3557 3558 if (err != 0) { 3559 bus = sc->ss->fw_stats_dma.bus_addr; 3560 bus += offsetof(struct mcp_irq_data, send_done_count); 3561 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3562 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3563 err = mxge_send_cmd(sc, 3564 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3565 &cmd); 3566 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3567 sc->fw_multicast_support = 0; 3568 } else { 3569 sc->fw_multicast_support = 1; 3570 } 3571 3572 if (err != 0) { 3573 device_printf(sc->dev, "failed to setup params\n"); 3574 goto abort; 3575 } 3576 3577 for (slice = 0; slice < sc->num_slices; slice++) { 3578 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3579 if (err != 0) { 3580 device_printf(sc->dev, "couldn't open slice %d\n", 3581 slice); 3582 goto abort; 3583 } 3584 } 3585 3586 /* Finally, start the firmware running */ 3587 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3588 if (err) { 3589 device_printf(sc->dev, "Couldn't bring up link\n"); 3590 goto abort; 3591 } 3592 for (slice = 0; slice < sc->num_slices; slice++) { 3593 ss = &sc->ss[slice]; 3594 ss->if_drv_flags |= IFF_DRV_RUNNING; 3595 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3596 } 3597 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3598 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3599 3600 return 0; 3601 3602 abort: 3603 mxge_free_mbufs(sc); 3604 3605 return err; 3606 } 3607 3608 static int 3609 mxge_close(mxge_softc_t *sc, int down) 3610 { 3611 mxge_cmd_t cmd; 3612 int err, old_down_cnt; 3613 struct mxge_slice_state *ss; 3614 int slice; 3615 3616 for (slice = 0; slice < sc->num_slices; slice++) { 3617 ss = &sc->ss[slice]; 3618 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3619 } 3620 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3621 if (!down) { 3622 old_down_cnt = sc->down_cnt; 3623 wmb(); 3624 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3625 if (err) { 3626 device_printf(sc->dev, 3627 "Couldn't bring down link\n"); 3628 } 3629 if (old_down_cnt == sc->down_cnt) { 3630 /* wait for down irq */ 3631 DELAY(10 * sc->intr_coal_delay); 3632 } 3633 wmb(); 3634 if (old_down_cnt == sc->down_cnt) { 3635 device_printf(sc->dev, "never got down irq\n"); 3636 } 3637 } 3638 mxge_free_mbufs(sc); 3639 3640 return 0; 3641 } 3642 3643 static void 3644 mxge_setup_cfg_space(mxge_softc_t *sc) 3645 { 3646 device_t dev = sc->dev; 3647 int reg; 3648 uint16_t lnk, pectl; 3649 3650 /* find the PCIe link width and set max read request to 4KB*/ 3651 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3652 lnk = pci_read_config(dev, reg + 0x12, 2); 3653 sc->link_width = (lnk >> 4) & 0x3f; 3654 3655 if (sc->pectl == 0) { 3656 pectl = pci_read_config(dev, reg + 0x8, 2); 3657 pectl = (pectl & ~0x7000) | (5 << 12); 3658 pci_write_config(dev, reg + 0x8, pectl, 2); 3659 sc->pectl = pectl; 3660 } else { 3661 /* restore saved pectl after watchdog reset */ 3662 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3663 } 3664 } 3665 3666 /* Enable DMA and Memory space access */ 3667 pci_enable_busmaster(dev); 3668 } 3669 3670 static uint32_t 3671 mxge_read_reboot(mxge_softc_t *sc) 3672 { 3673 device_t dev = sc->dev; 3674 uint32_t vs; 3675 3676 /* find the vendor specific offset */ 3677 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3678 device_printf(sc->dev, 3679 "could not find vendor specific offset\n"); 3680 return (uint32_t)-1; 3681 } 3682 /* enable read32 mode */ 3683 pci_write_config(dev, vs + 0x10, 0x3, 1); 3684 /* tell NIC which register to read */ 3685 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3686 return (pci_read_config(dev, vs + 0x14, 4)); 3687 } 3688 3689 static void 3690 mxge_watchdog_reset(mxge_softc_t *sc) 3691 { 3692 struct pci_devinfo *dinfo; 3693 struct mxge_slice_state *ss; 3694 int err, running, s, num_tx_slices = 1; 3695 uint32_t reboot; 3696 uint16_t cmd; 3697 3698 err = ENXIO; 3699 3700 device_printf(sc->dev, "Watchdog reset!\n"); 3701 3702 /* 3703 * check to see if the NIC rebooted. If it did, then all of 3704 * PCI config space has been reset, and things like the 3705 * busmaster bit will be zero. If this is the case, then we 3706 * must restore PCI config space before the NIC can be used 3707 * again 3708 */ 3709 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3710 if (cmd == 0xffff) { 3711 /* 3712 * maybe the watchdog caught the NIC rebooting; wait 3713 * up to 100ms for it to finish. If it does not come 3714 * back, then give up 3715 */ 3716 DELAY(1000*100); 3717 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3718 if (cmd == 0xffff) { 3719 device_printf(sc->dev, "NIC disappeared!\n"); 3720 } 3721 } 3722 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3723 /* print the reboot status */ 3724 reboot = mxge_read_reboot(sc); 3725 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3726 reboot); 3727 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3728 if (running) { 3729 /* 3730 * quiesce NIC so that TX routines will not try to 3731 * xmit after restoration of BAR 3732 */ 3733 3734 /* Mark the link as down */ 3735 if (sc->link_state) { 3736 sc->link_state = 0; 3737 if_link_state_change(sc->ifp, 3738 LINK_STATE_DOWN); 3739 } 3740 3741 num_tx_slices = sc->num_slices; 3742 3743 /* grab all TX locks to ensure no tx */ 3744 for (s = 0; s < num_tx_slices; s++) { 3745 ss = &sc->ss[s]; 3746 mtx_lock(&ss->tx.mtx); 3747 } 3748 mxge_close(sc, 1); 3749 } 3750 /* restore PCI configuration space */ 3751 dinfo = device_get_ivars(sc->dev); 3752 pci_cfg_restore(sc->dev, dinfo); 3753 3754 /* and redo any changes we made to our config space */ 3755 mxge_setup_cfg_space(sc); 3756 3757 /* reload f/w */ 3758 err = mxge_load_firmware(sc, 0); 3759 if (err) { 3760 device_printf(sc->dev, 3761 "Unable to re-load f/w\n"); 3762 } 3763 if (running) { 3764 if (!err) 3765 err = mxge_open(sc); 3766 /* release all TX locks */ 3767 for (s = 0; s < num_tx_slices; s++) { 3768 ss = &sc->ss[s]; 3769 mxge_start_locked(ss); 3770 mtx_unlock(&ss->tx.mtx); 3771 } 3772 } 3773 sc->watchdog_resets++; 3774 } else { 3775 device_printf(sc->dev, 3776 "NIC did not reboot, not resetting\n"); 3777 err = 0; 3778 } 3779 if (err) { 3780 device_printf(sc->dev, "watchdog reset failed\n"); 3781 } else { 3782 if (sc->dying == 2) 3783 sc->dying = 0; 3784 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3785 } 3786 } 3787 3788 static void 3789 mxge_watchdog_task(void *arg, int pending) 3790 { 3791 mxge_softc_t *sc = arg; 3792 3793 mtx_lock(&sc->driver_mtx); 3794 mxge_watchdog_reset(sc); 3795 mtx_unlock(&sc->driver_mtx); 3796 } 3797 3798 static void 3799 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3800 { 3801 tx = &sc->ss[slice].tx; 3802 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3803 device_printf(sc->dev, 3804 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3805 tx->req, tx->done, tx->queue_active); 3806 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3807 tx->activate, tx->deactivate); 3808 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3809 tx->pkt_done, 3810 be32toh(sc->ss->fw_stats->send_done_count)); 3811 } 3812 3813 static int 3814 mxge_watchdog(mxge_softc_t *sc) 3815 { 3816 mxge_tx_ring_t *tx; 3817 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3818 int i, err = 0; 3819 3820 /* see if we have outstanding transmits, which 3821 have been pending for more than mxge_ticks */ 3822 for (i = 0; (i < sc->num_slices) && (err == 0); i++) { 3823 tx = &sc->ss[i].tx; 3824 if (tx->req != tx->done && 3825 tx->watchdog_req != tx->watchdog_done && 3826 tx->done == tx->watchdog_done) { 3827 /* check for pause blocking before resetting */ 3828 if (tx->watchdog_rx_pause == rx_pause) { 3829 mxge_warn_stuck(sc, tx, i); 3830 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3831 return (ENXIO); 3832 } 3833 else 3834 device_printf(sc->dev, "Flow control blocking " 3835 "xmits, check link partner\n"); 3836 } 3837 3838 tx->watchdog_req = tx->req; 3839 tx->watchdog_done = tx->done; 3840 tx->watchdog_rx_pause = rx_pause; 3841 } 3842 3843 if (sc->need_media_probe) 3844 mxge_media_probe(sc); 3845 return (err); 3846 } 3847 3848 static uint64_t 3849 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 3850 { 3851 struct mxge_softc *sc; 3852 uint64_t rv; 3853 3854 sc = if_getsoftc(ifp); 3855 rv = 0; 3856 3857 switch (cnt) { 3858 case IFCOUNTER_IPACKETS: 3859 for (int s = 0; s < sc->num_slices; s++) 3860 rv += sc->ss[s].ipackets; 3861 return (rv); 3862 case IFCOUNTER_OPACKETS: 3863 for (int s = 0; s < sc->num_slices; s++) 3864 rv += sc->ss[s].opackets; 3865 return (rv); 3866 case IFCOUNTER_OERRORS: 3867 for (int s = 0; s < sc->num_slices; s++) 3868 rv += sc->ss[s].oerrors; 3869 return (rv); 3870 case IFCOUNTER_OBYTES: 3871 for (int s = 0; s < sc->num_slices; s++) 3872 rv += sc->ss[s].obytes; 3873 return (rv); 3874 case IFCOUNTER_OMCASTS: 3875 for (int s = 0; s < sc->num_slices; s++) 3876 rv += sc->ss[s].omcasts; 3877 return (rv); 3878 case IFCOUNTER_OQDROPS: 3879 for (int s = 0; s < sc->num_slices; s++) 3880 rv += sc->ss[s].tx.br->br_drops; 3881 return (rv); 3882 default: 3883 return (if_get_counter_default(ifp, cnt)); 3884 } 3885 } 3886 3887 static void 3888 mxge_tick(void *arg) 3889 { 3890 mxge_softc_t *sc = arg; 3891 u_long pkts = 0; 3892 int err = 0; 3893 int running, ticks; 3894 uint16_t cmd; 3895 3896 ticks = mxge_ticks; 3897 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3898 if (running) { 3899 if (!sc->watchdog_countdown) { 3900 err = mxge_watchdog(sc); 3901 sc->watchdog_countdown = 4; 3902 } 3903 sc->watchdog_countdown--; 3904 } 3905 if (pkts == 0) { 3906 /* ensure NIC did not suffer h/w fault while idle */ 3907 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3908 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3909 sc->dying = 2; 3910 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3911 err = ENXIO; 3912 } 3913 /* look less often if NIC is idle */ 3914 ticks *= 4; 3915 } 3916 3917 if (err == 0) 3918 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3919 3920 } 3921 3922 static int 3923 mxge_media_change(struct ifnet *ifp) 3924 { 3925 return EINVAL; 3926 } 3927 3928 static int 3929 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3930 { 3931 struct ifnet *ifp = sc->ifp; 3932 int real_mtu, old_mtu; 3933 int err = 0; 3934 3935 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3936 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3937 return EINVAL; 3938 mtx_lock(&sc->driver_mtx); 3939 old_mtu = ifp->if_mtu; 3940 ifp->if_mtu = mtu; 3941 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3942 mxge_close(sc, 0); 3943 err = mxge_open(sc); 3944 if (err != 0) { 3945 ifp->if_mtu = old_mtu; 3946 mxge_close(sc, 0); 3947 (void) mxge_open(sc); 3948 } 3949 } 3950 mtx_unlock(&sc->driver_mtx); 3951 return err; 3952 } 3953 3954 static void 3955 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3956 { 3957 mxge_softc_t *sc = ifp->if_softc; 3958 3959 if (sc == NULL) 3960 return; 3961 ifmr->ifm_status = IFM_AVALID; 3962 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3963 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3964 ifmr->ifm_active |= sc->current_media; 3965 } 3966 3967 static int 3968 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 3969 { 3970 mxge_cmd_t cmd; 3971 uint32_t i2c_args; 3972 int i, ms, err; 3973 3974 if (i2c->dev_addr != 0xA0 && 3975 i2c->dev_addr != 0xA2) 3976 return (EINVAL); 3977 if (i2c->len > sizeof(i2c->data)) 3978 return (EINVAL); 3979 3980 for (i = 0; i < i2c->len; i++) { 3981 i2c_args = i2c->dev_addr << 0x8; 3982 i2c_args |= i2c->offset + i; 3983 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3984 cmd.data1 = i2c_args; 3985 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3986 3987 if (err != MXGEFW_CMD_OK) 3988 return (EIO); 3989 /* now we wait for the data to be cached */ 3990 cmd.data0 = i2c_args & 0xff; 3991 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3992 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3993 cmd.data0 = i2c_args & 0xff; 3994 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3995 if (err == EBUSY) 3996 DELAY(1000); 3997 } 3998 if (err != MXGEFW_CMD_OK) 3999 return (EIO); 4000 i2c->data[i] = cmd.data0; 4001 } 4002 return (0); 4003 } 4004 4005 static int 4006 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4007 { 4008 mxge_softc_t *sc = ifp->if_softc; 4009 struct ifreq *ifr = (struct ifreq *)data; 4010 struct ifi2creq i2c; 4011 int err, mask; 4012 4013 err = 0; 4014 switch (command) { 4015 case SIOCSIFMTU: 4016 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4017 break; 4018 4019 case SIOCSIFFLAGS: 4020 mtx_lock(&sc->driver_mtx); 4021 if (sc->dying) { 4022 mtx_unlock(&sc->driver_mtx); 4023 return EINVAL; 4024 } 4025 if (ifp->if_flags & IFF_UP) { 4026 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4027 err = mxge_open(sc); 4028 } else { 4029 /* take care of promis can allmulti 4030 flag chages */ 4031 mxge_change_promisc(sc, 4032 ifp->if_flags & IFF_PROMISC); 4033 mxge_set_multicast_list(sc); 4034 } 4035 } else { 4036 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4037 mxge_close(sc, 0); 4038 } 4039 } 4040 mtx_unlock(&sc->driver_mtx); 4041 break; 4042 4043 case SIOCADDMULTI: 4044 case SIOCDELMULTI: 4045 mtx_lock(&sc->driver_mtx); 4046 if (sc->dying) { 4047 mtx_unlock(&sc->driver_mtx); 4048 return (EINVAL); 4049 } 4050 mxge_set_multicast_list(sc); 4051 mtx_unlock(&sc->driver_mtx); 4052 break; 4053 4054 case SIOCSIFCAP: 4055 mtx_lock(&sc->driver_mtx); 4056 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4057 if (mask & IFCAP_TXCSUM) { 4058 if (IFCAP_TXCSUM & ifp->if_capenable) { 4059 mask &= ~IFCAP_TSO4; 4060 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4061 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4062 } else { 4063 ifp->if_capenable |= IFCAP_TXCSUM; 4064 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4065 } 4066 } 4067 if (mask & IFCAP_RXCSUM) { 4068 if (IFCAP_RXCSUM & ifp->if_capenable) { 4069 ifp->if_capenable &= ~IFCAP_RXCSUM; 4070 } else { 4071 ifp->if_capenable |= IFCAP_RXCSUM; 4072 } 4073 } 4074 if (mask & IFCAP_TSO4) { 4075 if (IFCAP_TSO4 & ifp->if_capenable) { 4076 ifp->if_capenable &= ~IFCAP_TSO4; 4077 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4078 ifp->if_capenable |= IFCAP_TSO4; 4079 ifp->if_hwassist |= CSUM_TSO; 4080 } else { 4081 printf("mxge requires tx checksum offload" 4082 " be enabled to use TSO\n"); 4083 err = EINVAL; 4084 } 4085 } 4086 #if IFCAP_TSO6 4087 if (mask & IFCAP_TXCSUM_IPV6) { 4088 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4089 mask &= ~IFCAP_TSO6; 4090 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4091 | IFCAP_TSO6); 4092 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4093 | CSUM_UDP); 4094 } else { 4095 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4096 ifp->if_hwassist |= (CSUM_TCP_IPV6 4097 | CSUM_UDP_IPV6); 4098 } 4099 } 4100 if (mask & IFCAP_RXCSUM_IPV6) { 4101 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4102 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4103 } else { 4104 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4105 } 4106 } 4107 if (mask & IFCAP_TSO6) { 4108 if (IFCAP_TSO6 & ifp->if_capenable) { 4109 ifp->if_capenable &= ~IFCAP_TSO6; 4110 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4111 ifp->if_capenable |= IFCAP_TSO6; 4112 ifp->if_hwassist |= CSUM_TSO; 4113 } else { 4114 printf("mxge requires tx checksum offload" 4115 " be enabled to use TSO\n"); 4116 err = EINVAL; 4117 } 4118 } 4119 #endif /*IFCAP_TSO6 */ 4120 4121 if (mask & IFCAP_LRO) 4122 ifp->if_capenable ^= IFCAP_LRO; 4123 if (mask & IFCAP_VLAN_HWTAGGING) 4124 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4125 if (mask & IFCAP_VLAN_HWTSO) 4126 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4127 4128 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4129 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4130 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4131 4132 mtx_unlock(&sc->driver_mtx); 4133 VLAN_CAPABILITIES(ifp); 4134 4135 break; 4136 4137 case SIOCGIFMEDIA: 4138 mtx_lock(&sc->driver_mtx); 4139 if (sc->dying) { 4140 mtx_unlock(&sc->driver_mtx); 4141 return (EINVAL); 4142 } 4143 mxge_media_probe(sc); 4144 mtx_unlock(&sc->driver_mtx); 4145 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4146 &sc->media, command); 4147 break; 4148 4149 case SIOCGI2C: 4150 if (sc->connector != MXGE_XFP && 4151 sc->connector != MXGE_SFP) { 4152 err = ENXIO; 4153 break; 4154 } 4155 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4156 if (err != 0) 4157 break; 4158 mtx_lock(&sc->driver_mtx); 4159 if (sc->dying) { 4160 mtx_unlock(&sc->driver_mtx); 4161 return (EINVAL); 4162 } 4163 err = mxge_fetch_i2c(sc, &i2c); 4164 mtx_unlock(&sc->driver_mtx); 4165 if (err == 0) 4166 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4167 sizeof(i2c)); 4168 break; 4169 default: 4170 err = ether_ioctl(ifp, command, data); 4171 break; 4172 } 4173 return err; 4174 } 4175 4176 static void 4177 mxge_fetch_tunables(mxge_softc_t *sc) 4178 { 4179 4180 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4181 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4182 &mxge_flow_control); 4183 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4184 &mxge_intr_coal_delay); 4185 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4186 &mxge_nvidia_ecrc_enable); 4187 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4188 &mxge_force_firmware); 4189 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4190 &mxge_deassert_wait); 4191 TUNABLE_INT_FETCH("hw.mxge.verbose", 4192 &mxge_verbose); 4193 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4194 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4195 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4196 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4197 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4198 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4199 4200 if (bootverbose) 4201 mxge_verbose = 1; 4202 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4203 mxge_intr_coal_delay = 30; 4204 if (mxge_ticks == 0) 4205 mxge_ticks = hz / 2; 4206 sc->pause = mxge_flow_control; 4207 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4208 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4209 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4210 } 4211 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4212 mxge_initial_mtu < ETHER_MIN_LEN) 4213 mxge_initial_mtu = ETHERMTU_JUMBO; 4214 4215 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4216 mxge_throttle = MXGE_MAX_THROTTLE; 4217 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4218 mxge_throttle = MXGE_MIN_THROTTLE; 4219 sc->throttle = mxge_throttle; 4220 } 4221 4222 static void 4223 mxge_free_slices(mxge_softc_t *sc) 4224 { 4225 struct mxge_slice_state *ss; 4226 int i; 4227 4228 if (sc->ss == NULL) 4229 return; 4230 4231 for (i = 0; i < sc->num_slices; i++) { 4232 ss = &sc->ss[i]; 4233 if (ss->fw_stats != NULL) { 4234 mxge_dma_free(&ss->fw_stats_dma); 4235 ss->fw_stats = NULL; 4236 if (ss->tx.br != NULL) { 4237 drbr_free(ss->tx.br, M_DEVBUF); 4238 ss->tx.br = NULL; 4239 } 4240 mtx_destroy(&ss->tx.mtx); 4241 } 4242 if (ss->rx_done.entry != NULL) { 4243 mxge_dma_free(&ss->rx_done.dma); 4244 ss->rx_done.entry = NULL; 4245 } 4246 } 4247 free(sc->ss, M_DEVBUF); 4248 sc->ss = NULL; 4249 } 4250 4251 static int 4252 mxge_alloc_slices(mxge_softc_t *sc) 4253 { 4254 mxge_cmd_t cmd; 4255 struct mxge_slice_state *ss; 4256 size_t bytes; 4257 int err, i, max_intr_slots; 4258 4259 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4260 if (err != 0) { 4261 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4262 return err; 4263 } 4264 sc->rx_ring_size = cmd.data0; 4265 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4266 4267 bytes = sizeof (*sc->ss) * sc->num_slices; 4268 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4269 if (sc->ss == NULL) 4270 return (ENOMEM); 4271 for (i = 0; i < sc->num_slices; i++) { 4272 ss = &sc->ss[i]; 4273 4274 ss->sc = sc; 4275 4276 /* allocate per-slice rx interrupt queues */ 4277 4278 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4279 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4280 if (err != 0) 4281 goto abort; 4282 ss->rx_done.entry = ss->rx_done.dma.addr; 4283 bzero(ss->rx_done.entry, bytes); 4284 4285 /* 4286 * allocate the per-slice firmware stats; stats 4287 * (including tx) are used used only on the first 4288 * slice for now 4289 */ 4290 4291 bytes = sizeof (*ss->fw_stats); 4292 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4293 sizeof (*ss->fw_stats), 64); 4294 if (err != 0) 4295 goto abort; 4296 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4297 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4298 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4299 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4300 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4301 &ss->tx.mtx); 4302 } 4303 4304 return (0); 4305 4306 abort: 4307 mxge_free_slices(sc); 4308 return (ENOMEM); 4309 } 4310 4311 static void 4312 mxge_slice_probe(mxge_softc_t *sc) 4313 { 4314 mxge_cmd_t cmd; 4315 char *old_fw; 4316 int msix_cnt, status, max_intr_slots; 4317 4318 sc->num_slices = 1; 4319 /* 4320 * don't enable multiple slices if they are not enabled, 4321 * or if this is not an SMP system 4322 */ 4323 4324 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4325 return; 4326 4327 /* see how many MSI-X interrupts are available */ 4328 msix_cnt = pci_msix_count(sc->dev); 4329 if (msix_cnt < 2) 4330 return; 4331 4332 /* now load the slice aware firmware see what it supports */ 4333 old_fw = sc->fw_name; 4334 if (old_fw == mxge_fw_aligned) 4335 sc->fw_name = mxge_fw_rss_aligned; 4336 else 4337 sc->fw_name = mxge_fw_rss_unaligned; 4338 status = mxge_load_firmware(sc, 0); 4339 if (status != 0) { 4340 device_printf(sc->dev, "Falling back to a single slice\n"); 4341 return; 4342 } 4343 4344 /* try to send a reset command to the card to see if it 4345 is alive */ 4346 memset(&cmd, 0, sizeof (cmd)); 4347 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4348 if (status != 0) { 4349 device_printf(sc->dev, "failed reset\n"); 4350 goto abort_with_fw; 4351 } 4352 4353 /* get rx ring size */ 4354 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4355 if (status != 0) { 4356 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4357 goto abort_with_fw; 4358 } 4359 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4360 4361 /* tell it the size of the interrupt queues */ 4362 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4363 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4364 if (status != 0) { 4365 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4366 goto abort_with_fw; 4367 } 4368 4369 /* ask the maximum number of slices it supports */ 4370 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4371 if (status != 0) { 4372 device_printf(sc->dev, 4373 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4374 goto abort_with_fw; 4375 } 4376 sc->num_slices = cmd.data0; 4377 if (sc->num_slices > msix_cnt) 4378 sc->num_slices = msix_cnt; 4379 4380 if (mxge_max_slices == -1) { 4381 /* cap to number of CPUs in system */ 4382 if (sc->num_slices > mp_ncpus) 4383 sc->num_slices = mp_ncpus; 4384 } else { 4385 if (sc->num_slices > mxge_max_slices) 4386 sc->num_slices = mxge_max_slices; 4387 } 4388 /* make sure it is a power of two */ 4389 while (sc->num_slices & (sc->num_slices - 1)) 4390 sc->num_slices--; 4391 4392 if (mxge_verbose) 4393 device_printf(sc->dev, "using %d slices\n", 4394 sc->num_slices); 4395 4396 return; 4397 4398 abort_with_fw: 4399 sc->fw_name = old_fw; 4400 (void) mxge_load_firmware(sc, 0); 4401 } 4402 4403 static int 4404 mxge_add_msix_irqs(mxge_softc_t *sc) 4405 { 4406 size_t bytes; 4407 int count, err, i, rid; 4408 4409 rid = PCIR_BAR(2); 4410 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4411 &rid, RF_ACTIVE); 4412 4413 if (sc->msix_table_res == NULL) { 4414 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4415 return ENXIO; 4416 } 4417 4418 count = sc->num_slices; 4419 err = pci_alloc_msix(sc->dev, &count); 4420 if (err != 0) { 4421 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4422 "err = %d \n", sc->num_slices, err); 4423 goto abort_with_msix_table; 4424 } 4425 if (count < sc->num_slices) { 4426 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4427 count, sc->num_slices); 4428 device_printf(sc->dev, 4429 "Try setting hw.mxge.max_slices to %d\n", 4430 count); 4431 err = ENOSPC; 4432 goto abort_with_msix; 4433 } 4434 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4435 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4436 if (sc->msix_irq_res == NULL) { 4437 err = ENOMEM; 4438 goto abort_with_msix; 4439 } 4440 4441 for (i = 0; i < sc->num_slices; i++) { 4442 rid = i + 1; 4443 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4444 SYS_RES_IRQ, 4445 &rid, RF_ACTIVE); 4446 if (sc->msix_irq_res[i] == NULL) { 4447 device_printf(sc->dev, "couldn't allocate IRQ res" 4448 " for message %d\n", i); 4449 err = ENXIO; 4450 goto abort_with_res; 4451 } 4452 } 4453 4454 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4455 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4456 4457 for (i = 0; i < sc->num_slices; i++) { 4458 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4459 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4460 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4461 if (err != 0) { 4462 device_printf(sc->dev, "couldn't setup intr for " 4463 "message %d\n", i); 4464 goto abort_with_intr; 4465 } 4466 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4467 sc->msix_ih[i], "s%d", i); 4468 } 4469 4470 if (mxge_verbose) { 4471 device_printf(sc->dev, "using %d msix IRQs:", 4472 sc->num_slices); 4473 for (i = 0; i < sc->num_slices; i++) 4474 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4475 printf("\n"); 4476 } 4477 return (0); 4478 4479 abort_with_intr: 4480 for (i = 0; i < sc->num_slices; i++) { 4481 if (sc->msix_ih[i] != NULL) { 4482 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4483 sc->msix_ih[i]); 4484 sc->msix_ih[i] = NULL; 4485 } 4486 } 4487 free(sc->msix_ih, M_DEVBUF); 4488 4489 abort_with_res: 4490 for (i = 0; i < sc->num_slices; i++) { 4491 rid = i + 1; 4492 if (sc->msix_irq_res[i] != NULL) 4493 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4494 sc->msix_irq_res[i]); 4495 sc->msix_irq_res[i] = NULL; 4496 } 4497 free(sc->msix_irq_res, M_DEVBUF); 4498 4499 abort_with_msix: 4500 pci_release_msi(sc->dev); 4501 4502 abort_with_msix_table: 4503 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4504 sc->msix_table_res); 4505 4506 return err; 4507 } 4508 4509 static int 4510 mxge_add_single_irq(mxge_softc_t *sc) 4511 { 4512 int count, err, rid; 4513 4514 count = pci_msi_count(sc->dev); 4515 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4516 rid = 1; 4517 } else { 4518 rid = 0; 4519 sc->legacy_irq = 1; 4520 } 4521 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4522 RF_SHAREABLE | RF_ACTIVE); 4523 if (sc->irq_res == NULL) { 4524 device_printf(sc->dev, "could not alloc interrupt\n"); 4525 return ENXIO; 4526 } 4527 if (mxge_verbose) 4528 device_printf(sc->dev, "using %s irq %jd\n", 4529 sc->legacy_irq ? "INTx" : "MSI", 4530 rman_get_start(sc->irq_res)); 4531 err = bus_setup_intr(sc->dev, sc->irq_res, 4532 INTR_TYPE_NET | INTR_MPSAFE, NULL, 4533 mxge_intr, &sc->ss[0], &sc->ih); 4534 if (err != 0) { 4535 bus_release_resource(sc->dev, SYS_RES_IRQ, 4536 sc->legacy_irq ? 0 : 1, sc->irq_res); 4537 if (!sc->legacy_irq) 4538 pci_release_msi(sc->dev); 4539 } 4540 return err; 4541 } 4542 4543 static void 4544 mxge_rem_msix_irqs(mxge_softc_t *sc) 4545 { 4546 int i, rid; 4547 4548 for (i = 0; i < sc->num_slices; i++) { 4549 if (sc->msix_ih[i] != NULL) { 4550 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4551 sc->msix_ih[i]); 4552 sc->msix_ih[i] = NULL; 4553 } 4554 } 4555 free(sc->msix_ih, M_DEVBUF); 4556 4557 for (i = 0; i < sc->num_slices; i++) { 4558 rid = i + 1; 4559 if (sc->msix_irq_res[i] != NULL) 4560 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4561 sc->msix_irq_res[i]); 4562 sc->msix_irq_res[i] = NULL; 4563 } 4564 free(sc->msix_irq_res, M_DEVBUF); 4565 4566 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4567 sc->msix_table_res); 4568 4569 pci_release_msi(sc->dev); 4570 return; 4571 } 4572 4573 static void 4574 mxge_rem_single_irq(mxge_softc_t *sc) 4575 { 4576 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4577 bus_release_resource(sc->dev, SYS_RES_IRQ, 4578 sc->legacy_irq ? 0 : 1, sc->irq_res); 4579 if (!sc->legacy_irq) 4580 pci_release_msi(sc->dev); 4581 } 4582 4583 static void 4584 mxge_rem_irq(mxge_softc_t *sc) 4585 { 4586 if (sc->num_slices > 1) 4587 mxge_rem_msix_irqs(sc); 4588 else 4589 mxge_rem_single_irq(sc); 4590 } 4591 4592 static int 4593 mxge_add_irq(mxge_softc_t *sc) 4594 { 4595 int err; 4596 4597 if (sc->num_slices > 1) 4598 err = mxge_add_msix_irqs(sc); 4599 else 4600 err = mxge_add_single_irq(sc); 4601 4602 if (0 && err == 0 && sc->num_slices > 1) { 4603 mxge_rem_msix_irqs(sc); 4604 err = mxge_add_msix_irqs(sc); 4605 } 4606 return err; 4607 } 4608 4609 static int 4610 mxge_attach(device_t dev) 4611 { 4612 mxge_cmd_t cmd; 4613 mxge_softc_t *sc = device_get_softc(dev); 4614 struct ifnet *ifp; 4615 int err, rid; 4616 4617 sc->dev = dev; 4618 mxge_fetch_tunables(sc); 4619 4620 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4621 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4622 taskqueue_thread_enqueue, &sc->tq); 4623 if (sc->tq == NULL) { 4624 err = ENOMEM; 4625 goto abort_with_nothing; 4626 } 4627 4628 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4629 1, /* alignment */ 4630 0, /* boundary */ 4631 BUS_SPACE_MAXADDR, /* low */ 4632 BUS_SPACE_MAXADDR, /* high */ 4633 NULL, NULL, /* filter */ 4634 65536 + 256, /* maxsize */ 4635 MXGE_MAX_SEND_DESC, /* num segs */ 4636 65536, /* maxsegsize */ 4637 0, /* flags */ 4638 NULL, NULL, /* lock */ 4639 &sc->parent_dmat); /* tag */ 4640 4641 if (err != 0) { 4642 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4643 err); 4644 goto abort_with_tq; 4645 } 4646 4647 ifp = sc->ifp = if_alloc(IFT_ETHER); 4648 if (ifp == NULL) { 4649 device_printf(dev, "can not if_alloc()\n"); 4650 err = ENOSPC; 4651 goto abort_with_parent_dmat; 4652 } 4653 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4654 4655 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4656 device_get_nameunit(dev)); 4657 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4658 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4659 "%s:drv", device_get_nameunit(dev)); 4660 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4661 MTX_NETWORK_LOCK, MTX_DEF); 4662 4663 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4664 4665 mxge_setup_cfg_space(sc); 4666 4667 /* Map the board into the kernel */ 4668 rid = PCIR_BARS; 4669 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4670 RF_ACTIVE); 4671 if (sc->mem_res == NULL) { 4672 device_printf(dev, "could not map memory\n"); 4673 err = ENXIO; 4674 goto abort_with_lock; 4675 } 4676 sc->sram = rman_get_virtual(sc->mem_res); 4677 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4678 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4679 device_printf(dev, "impossible memory region size %jd\n", 4680 rman_get_size(sc->mem_res)); 4681 err = ENXIO; 4682 goto abort_with_mem_res; 4683 } 4684 4685 /* make NULL terminated copy of the EEPROM strings section of 4686 lanai SRAM */ 4687 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4688 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4689 rman_get_bushandle(sc->mem_res), 4690 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4691 sc->eeprom_strings, 4692 MXGE_EEPROM_STRINGS_SIZE - 2); 4693 err = mxge_parse_strings(sc); 4694 if (err != 0) 4695 goto abort_with_mem_res; 4696 4697 /* Enable write combining for efficient use of PCIe bus */ 4698 mxge_enable_wc(sc); 4699 4700 /* Allocate the out of band dma memory */ 4701 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4702 sizeof (mxge_cmd_t), 64); 4703 if (err != 0) 4704 goto abort_with_mem_res; 4705 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4706 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4707 if (err != 0) 4708 goto abort_with_cmd_dma; 4709 4710 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4711 if (err != 0) 4712 goto abort_with_zeropad_dma; 4713 4714 /* select & load the firmware */ 4715 err = mxge_select_firmware(sc); 4716 if (err != 0) 4717 goto abort_with_dmabench; 4718 sc->intr_coal_delay = mxge_intr_coal_delay; 4719 4720 mxge_slice_probe(sc); 4721 err = mxge_alloc_slices(sc); 4722 if (err != 0) 4723 goto abort_with_dmabench; 4724 4725 err = mxge_reset(sc, 0); 4726 if (err != 0) 4727 goto abort_with_slices; 4728 4729 err = mxge_alloc_rings(sc); 4730 if (err != 0) { 4731 device_printf(sc->dev, "failed to allocate rings\n"); 4732 goto abort_with_slices; 4733 } 4734 4735 err = mxge_add_irq(sc); 4736 if (err != 0) { 4737 device_printf(sc->dev, "failed to add irq\n"); 4738 goto abort_with_rings; 4739 } 4740 4741 ifp->if_baudrate = IF_Gbps(10); 4742 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4743 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4744 IFCAP_RXCSUM_IPV6; 4745 #if defined(INET) || defined(INET6) 4746 ifp->if_capabilities |= IFCAP_LRO; 4747 #endif 4748 4749 #ifdef MXGE_NEW_VLAN_API 4750 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4751 4752 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4753 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4754 sc->fw_ver_tiny >= 32) 4755 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4756 #endif 4757 sc->max_mtu = mxge_max_mtu(sc); 4758 if (sc->max_mtu >= 9000) 4759 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4760 else 4761 device_printf(dev, "MTU limited to %d. Install " 4762 "latest firmware for 9000 byte jumbo support\n", 4763 sc->max_mtu - ETHER_HDR_LEN); 4764 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4765 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4766 /* check to see if f/w supports TSO for IPv6 */ 4767 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4768 if (CSUM_TCP_IPV6) 4769 ifp->if_capabilities |= IFCAP_TSO6; 4770 sc->max_tso6_hlen = min(cmd.data0, 4771 sizeof (sc->ss[0].scratch)); 4772 } 4773 ifp->if_capenable = ifp->if_capabilities; 4774 if (sc->lro_cnt == 0) 4775 ifp->if_capenable &= ~IFCAP_LRO; 4776 ifp->if_init = mxge_init; 4777 ifp->if_softc = sc; 4778 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4779 ifp->if_ioctl = mxge_ioctl; 4780 ifp->if_start = mxge_start; 4781 ifp->if_get_counter = mxge_get_counter; 4782 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4783 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4784 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4785 /* Initialise the ifmedia structure */ 4786 ifmedia_init(&sc->media, 0, mxge_media_change, 4787 mxge_media_status); 4788 mxge_media_init(sc); 4789 mxge_media_probe(sc); 4790 sc->dying = 0; 4791 ether_ifattach(ifp, sc->mac_addr); 4792 /* ether_ifattach sets mtu to ETHERMTU */ 4793 if (mxge_initial_mtu != ETHERMTU) 4794 mxge_change_mtu(sc, mxge_initial_mtu); 4795 4796 mxge_add_sysctls(sc); 4797 ifp->if_transmit = mxge_transmit; 4798 ifp->if_qflush = mxge_qflush; 4799 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4800 device_get_nameunit(sc->dev)); 4801 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4802 return 0; 4803 4804 abort_with_rings: 4805 mxge_free_rings(sc); 4806 abort_with_slices: 4807 mxge_free_slices(sc); 4808 abort_with_dmabench: 4809 mxge_dma_free(&sc->dmabench_dma); 4810 abort_with_zeropad_dma: 4811 mxge_dma_free(&sc->zeropad_dma); 4812 abort_with_cmd_dma: 4813 mxge_dma_free(&sc->cmd_dma); 4814 abort_with_mem_res: 4815 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4816 abort_with_lock: 4817 pci_disable_busmaster(dev); 4818 mtx_destroy(&sc->cmd_mtx); 4819 mtx_destroy(&sc->driver_mtx); 4820 if_free(ifp); 4821 abort_with_parent_dmat: 4822 bus_dma_tag_destroy(sc->parent_dmat); 4823 abort_with_tq: 4824 if (sc->tq != NULL) { 4825 taskqueue_drain(sc->tq, &sc->watchdog_task); 4826 taskqueue_free(sc->tq); 4827 sc->tq = NULL; 4828 } 4829 abort_with_nothing: 4830 return err; 4831 } 4832 4833 static int 4834 mxge_detach(device_t dev) 4835 { 4836 mxge_softc_t *sc = device_get_softc(dev); 4837 4838 if (mxge_vlans_active(sc)) { 4839 device_printf(sc->dev, 4840 "Detach vlans before removing module\n"); 4841 return EBUSY; 4842 } 4843 mtx_lock(&sc->driver_mtx); 4844 sc->dying = 1; 4845 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4846 mxge_close(sc, 0); 4847 mtx_unlock(&sc->driver_mtx); 4848 ether_ifdetach(sc->ifp); 4849 if (sc->tq != NULL) { 4850 taskqueue_drain(sc->tq, &sc->watchdog_task); 4851 taskqueue_free(sc->tq); 4852 sc->tq = NULL; 4853 } 4854 callout_drain(&sc->co_hdl); 4855 ifmedia_removeall(&sc->media); 4856 mxge_dummy_rdma(sc, 0); 4857 mxge_rem_sysctls(sc); 4858 mxge_rem_irq(sc); 4859 mxge_free_rings(sc); 4860 mxge_free_slices(sc); 4861 mxge_dma_free(&sc->dmabench_dma); 4862 mxge_dma_free(&sc->zeropad_dma); 4863 mxge_dma_free(&sc->cmd_dma); 4864 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4865 pci_disable_busmaster(dev); 4866 mtx_destroy(&sc->cmd_mtx); 4867 mtx_destroy(&sc->driver_mtx); 4868 if_free(sc->ifp); 4869 bus_dma_tag_destroy(sc->parent_dmat); 4870 return 0; 4871 } 4872 4873 static int 4874 mxge_shutdown(device_t dev) 4875 { 4876 return 0; 4877 } 4878 4879 /* 4880 This file uses Myri10GE driver indentation. 4881 4882 Local Variables: 4883 c-file-style:"linux" 4884 tab-width:8 4885 End: 4886 */ 4887