1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #ifdef IFNET_BUF_RING 96 #include <sys/buf_ring.h> 97 #endif 98 99 #include "opt_inet.h" 100 #include "opt_inet6.h" 101 102 /* tunable params */ 103 static int mxge_nvidia_ecrc_enable = 1; 104 static int mxge_force_firmware = 0; 105 static int mxge_intr_coal_delay = 30; 106 static int mxge_deassert_wait = 1; 107 static int mxge_flow_control = 1; 108 static int mxge_verbose = 0; 109 static int mxge_ticks; 110 static int mxge_max_slices = 1; 111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 112 static int mxge_always_promisc = 0; 113 static int mxge_initial_mtu = ETHERMTU_JUMBO; 114 static int mxge_throttle = 0; 115 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 116 static char *mxge_fw_aligned = "mxge_eth_z8e"; 117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 119 120 static int mxge_probe(device_t dev); 121 static int mxge_attach(device_t dev); 122 static int mxge_detach(device_t dev); 123 static int mxge_shutdown(device_t dev); 124 static void mxge_intr(void *arg); 125 126 static device_method_t mxge_methods[] = 127 { 128 /* Device interface */ 129 DEVMETHOD(device_probe, mxge_probe), 130 DEVMETHOD(device_attach, mxge_attach), 131 DEVMETHOD(device_detach, mxge_detach), 132 DEVMETHOD(device_shutdown, mxge_shutdown), 133 134 DEVMETHOD_END 135 }; 136 137 static driver_t mxge_driver = 138 { 139 "mxge", 140 mxge_methods, 141 sizeof(mxge_softc_t), 142 }; 143 144 /* Declare ourselves to be a child of the PCI bus.*/ 145 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0); 146 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 147 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 148 149 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 150 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 151 static int mxge_close(mxge_softc_t *sc, int down); 152 static int mxge_open(mxge_softc_t *sc); 153 static void mxge_tick(void *arg); 154 155 static int 156 mxge_probe(device_t dev) 157 { 158 int rev; 159 160 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 161 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 162 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 163 rev = pci_get_revid(dev); 164 switch (rev) { 165 case MXGE_PCI_REV_Z8E: 166 device_set_desc(dev, "Myri10G-PCIE-8A"); 167 break; 168 case MXGE_PCI_REV_Z8ES: 169 device_set_desc(dev, "Myri10G-PCIE-8B"); 170 break; 171 default: 172 device_set_desc(dev, "Myri10G-PCIE-8??"); 173 device_printf(dev, "Unrecognized rev %d NIC\n", 174 rev); 175 break; 176 } 177 return 0; 178 } 179 return ENXIO; 180 } 181 182 static void 183 mxge_enable_wc(mxge_softc_t *sc) 184 { 185 #if defined(__i386) || defined(__amd64) 186 vm_offset_t len; 187 int err; 188 189 sc->wc = 1; 190 len = rman_get_size(sc->mem_res); 191 err = pmap_change_attr((vm_offset_t) sc->sram, 192 len, PAT_WRITE_COMBINING); 193 if (err != 0) { 194 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 195 err); 196 sc->wc = 0; 197 } 198 #endif 199 } 200 201 /* callback to get our DMA address */ 202 static void 203 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 204 int error) 205 { 206 if (error == 0) { 207 *(bus_addr_t *) arg = segs->ds_addr; 208 } 209 } 210 211 static int 212 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 213 bus_size_t alignment) 214 { 215 int err; 216 device_t dev = sc->dev; 217 bus_size_t boundary, maxsegsize; 218 219 if (bytes > 4096 && alignment == 4096) { 220 boundary = 0; 221 maxsegsize = bytes; 222 } else { 223 boundary = 4096; 224 maxsegsize = 4096; 225 } 226 227 /* allocate DMAable memory tags */ 228 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 229 alignment, /* alignment */ 230 boundary, /* boundary */ 231 BUS_SPACE_MAXADDR, /* low */ 232 BUS_SPACE_MAXADDR, /* high */ 233 NULL, NULL, /* filter */ 234 bytes, /* maxsize */ 235 1, /* num segs */ 236 maxsegsize, /* maxsegsize */ 237 BUS_DMA_COHERENT, /* flags */ 238 NULL, NULL, /* lock */ 239 &dma->dmat); /* tag */ 240 if (err != 0) { 241 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 242 return err; 243 } 244 245 /* allocate DMAable memory & map */ 246 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 247 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 248 | BUS_DMA_ZERO), &dma->map); 249 if (err != 0) { 250 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 251 goto abort_with_dmat; 252 } 253 254 /* load the memory */ 255 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 256 mxge_dmamap_callback, 257 (void *)&dma->bus_addr, 0); 258 if (err != 0) { 259 device_printf(dev, "couldn't load map (err = %d)\n", err); 260 goto abort_with_mem; 261 } 262 return 0; 263 264 abort_with_mem: 265 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 266 abort_with_dmat: 267 (void)bus_dma_tag_destroy(dma->dmat); 268 return err; 269 } 270 271 static void 272 mxge_dma_free(mxge_dma_t *dma) 273 { 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277 } 278 279 /* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286 static int 287 mxge_parse_strings(mxge_softc_t *sc) 288 { 289 char *ptr; 290 int i, found_mac, found_sn2; 291 char *endptr; 292 293 ptr = sc->eeprom_strings; 294 found_mac = 0; 295 found_sn2 = 0; 296 while (*ptr != '\0') { 297 if (strncmp(ptr, "MAC=", 4) == 0) { 298 ptr += 4; 299 for (i = 0;;) { 300 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 301 if (endptr - ptr != 2) 302 goto abort; 303 ptr = endptr; 304 if (++i == 6) 305 break; 306 if (*ptr++ != ':') 307 goto abort; 308 } 309 found_mac = 1; 310 } else if (strncmp(ptr, "PC=", 3) == 0) { 311 ptr += 3; 312 strlcpy(sc->product_code_string, ptr, 313 sizeof(sc->product_code_string)); 314 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 315 ptr += 3; 316 strlcpy(sc->serial_number_string, ptr, 317 sizeof(sc->serial_number_string)); 318 } else if (strncmp(ptr, "SN2=", 4) == 0) { 319 /* SN2 takes precedence over SN */ 320 ptr += 4; 321 found_sn2 = 1; 322 strlcpy(sc->serial_number_string, ptr, 323 sizeof(sc->serial_number_string)); 324 } 325 while (*ptr++ != '\0') {} 326 } 327 328 if (found_mac) 329 return 0; 330 331 abort: 332 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 333 334 return ENXIO; 335 } 336 337 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 338 static void 339 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 340 { 341 uint32_t val; 342 unsigned long base, off; 343 char *va, *cfgptr; 344 device_t pdev, mcp55; 345 uint16_t vendor_id, device_id, word; 346 uintptr_t bus, slot, func, ivend, idev; 347 uint32_t *ptr32; 348 349 if (!mxge_nvidia_ecrc_enable) 350 return; 351 352 pdev = device_get_parent(device_get_parent(sc->dev)); 353 if (pdev == NULL) { 354 device_printf(sc->dev, "could not find parent?\n"); 355 return; 356 } 357 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 358 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 359 360 if (vendor_id != 0x10de) 361 return; 362 363 base = 0; 364 365 if (device_id == 0x005d) { 366 /* ck804, base address is magic */ 367 base = 0xe0000000UL; 368 } else if (device_id >= 0x0374 && device_id <= 0x378) { 369 /* mcp55, base address stored in chipset */ 370 mcp55 = pci_find_bsf(0, 0, 0); 371 if (mcp55 && 372 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 373 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 374 word = pci_read_config(mcp55, 0x90, 2); 375 base = ((unsigned long)word & 0x7ffeU) << 25; 376 } 377 } 378 if (!base) 379 return; 380 381 /* XXXX 382 Test below is commented because it is believed that doing 383 config read/write beyond 0xff will access the config space 384 for the next larger function. Uncomment this and remove 385 the hacky pmap_mapdev() way of accessing config space when 386 FreeBSD grows support for extended pcie config space access 387 */ 388 #if 0 389 /* See if we can, by some miracle, access the extended 390 config space */ 391 val = pci_read_config(pdev, 0x178, 4); 392 if (val != 0xffffffff) { 393 val |= 0x40; 394 pci_write_config(pdev, 0x178, val, 4); 395 return; 396 } 397 #endif 398 /* Rather than using normal pci config space writes, we must 399 * map the Nvidia config space ourselves. This is because on 400 * opteron/nvidia class machine the 0xe000000 mapping is 401 * handled by the nvidia chipset, that means the internal PCI 402 * device (the on-chip northbridge), or the amd-8131 bridge 403 * and things behind them are not visible by this method. 404 */ 405 406 BUS_READ_IVAR(device_get_parent(pdev), pdev, 407 PCI_IVAR_BUS, &bus); 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_SLOT, &slot); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_FUNCTION, &func); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_VENDOR, &ivend); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_DEVICE, &idev); 416 417 off = base 418 + 0x00100000UL * (unsigned long)bus 419 + 0x00001000UL * (unsigned long)(func 420 + 8 * slot); 421 422 /* map it into the kernel */ 423 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 424 425 if (va == NULL) { 426 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 427 return; 428 } 429 /* get a pointer to the config space mapped into the kernel */ 430 cfgptr = va + (off & PAGE_MASK); 431 432 /* make sure that we can really access it */ 433 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 434 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 435 if (! (vendor_id == ivend && device_id == idev)) { 436 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 437 vendor_id, device_id); 438 pmap_unmapdev(va, PAGE_SIZE); 439 return; 440 } 441 442 ptr32 = (uint32_t*)(cfgptr + 0x178); 443 val = *ptr32; 444 445 if (val == 0xffffffff) { 446 device_printf(sc->dev, "extended mapping failed\n"); 447 pmap_unmapdev(va, PAGE_SIZE); 448 return; 449 } 450 *ptr32 = val | 0x40; 451 pmap_unmapdev(va, PAGE_SIZE); 452 if (mxge_verbose) 453 device_printf(sc->dev, 454 "Enabled ECRC on upstream Nvidia bridge " 455 "at %d:%d:%d\n", 456 (int)bus, (int)slot, (int)func); 457 return; 458 } 459 #else 460 static void 461 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 462 { 463 device_printf(sc->dev, 464 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 465 return; 466 } 467 #endif 468 469 static int 470 mxge_dma_test(mxge_softc_t *sc, int test_type) 471 { 472 mxge_cmd_t cmd; 473 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 474 int status; 475 uint32_t len; 476 char *test = " "; 477 478 /* Run a small DMA test. 479 * The magic multipliers to the length tell the firmware 480 * to do DMA read, write, or read+write tests. The 481 * results are returned in cmd.data0. The upper 16 482 * bits of the return is the number of transfers completed. 483 * The lower 16 bits is the time in 0.5us ticks that the 484 * transfers took to complete. 485 */ 486 487 len = sc->tx_boundary; 488 489 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 490 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 491 cmd.data2 = len * 0x10000; 492 status = mxge_send_cmd(sc, test_type, &cmd); 493 if (status != 0) { 494 test = "read"; 495 goto abort; 496 } 497 sc->read_dma = ((cmd.data0>>16) * len * 2) / 498 (cmd.data0 & 0xffff); 499 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 500 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 501 cmd.data2 = len * 0x1; 502 status = mxge_send_cmd(sc, test_type, &cmd); 503 if (status != 0) { 504 test = "write"; 505 goto abort; 506 } 507 sc->write_dma = ((cmd.data0>>16) * len * 2) / 508 (cmd.data0 & 0xffff); 509 510 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 511 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 512 cmd.data2 = len * 0x10001; 513 status = mxge_send_cmd(sc, test_type, &cmd); 514 if (status != 0) { 515 test = "read/write"; 516 goto abort; 517 } 518 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 519 (cmd.data0 & 0xffff); 520 521 abort: 522 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 523 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 524 test, status); 525 526 return status; 527 } 528 529 /* 530 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 531 * when the PCI-E Completion packets are aligned on an 8-byte 532 * boundary. Some PCI-E chip sets always align Completion packets; on 533 * the ones that do not, the alignment can be enforced by enabling 534 * ECRC generation (if supported). 535 * 536 * When PCI-E Completion packets are not aligned, it is actually more 537 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 538 * 539 * If the driver can neither enable ECRC nor verify that it has 540 * already been enabled, then it must use a firmware image which works 541 * around unaligned completion packets (ethp_z8e.dat), and it should 542 * also ensure that it never gives the device a Read-DMA which is 543 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 544 * enabled, then the driver should use the aligned (eth_z8e.dat) 545 * firmware image, and set tx_boundary to 4KB. 546 */ 547 548 static int 549 mxge_firmware_probe(mxge_softc_t *sc) 550 { 551 device_t dev = sc->dev; 552 int reg, status; 553 uint16_t pectl; 554 555 sc->tx_boundary = 4096; 556 /* 557 * Verify the max read request size was set to 4KB 558 * before trying the test with 4KB. 559 */ 560 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 561 pectl = pci_read_config(dev, reg + 0x8, 2); 562 if ((pectl & (5 << 12)) != (5 << 12)) { 563 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 564 pectl); 565 sc->tx_boundary = 2048; 566 } 567 } 568 569 /* 570 * load the optimized firmware (which assumes aligned PCIe 571 * completions) in order to see if it works on this host. 572 */ 573 sc->fw_name = mxge_fw_aligned; 574 status = mxge_load_firmware(sc, 1); 575 if (status != 0) { 576 return status; 577 } 578 579 /* 580 * Enable ECRC if possible 581 */ 582 mxge_enable_nvidia_ecrc(sc); 583 584 /* 585 * Run a DMA test which watches for unaligned completions and 586 * aborts on the first one seen. Not required on Z8ES or newer. 587 */ 588 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 589 return 0; 590 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 591 if (status == 0) 592 return 0; /* keep the aligned firmware */ 593 594 if (status != E2BIG) 595 device_printf(dev, "DMA test failed: %d\n", status); 596 if (status == ENOSYS) 597 device_printf(dev, "Falling back to ethp! " 598 "Please install up to date fw\n"); 599 return status; 600 } 601 602 static int 603 mxge_select_firmware(mxge_softc_t *sc) 604 { 605 int aligned = 0; 606 int force_firmware = mxge_force_firmware; 607 608 if (sc->throttle) 609 force_firmware = sc->throttle; 610 611 if (force_firmware != 0) { 612 if (force_firmware == 1) 613 aligned = 1; 614 else 615 aligned = 0; 616 if (mxge_verbose) 617 device_printf(sc->dev, 618 "Assuming %s completions (forced)\n", 619 aligned ? "aligned" : "unaligned"); 620 goto abort; 621 } 622 623 /* if the PCIe link width is 4 or less, we can use the aligned 624 firmware and skip any checks */ 625 if (sc->link_width != 0 && sc->link_width <= 4) { 626 device_printf(sc->dev, 627 "PCIe x%d Link, expect reduced performance\n", 628 sc->link_width); 629 aligned = 1; 630 goto abort; 631 } 632 633 if (0 == mxge_firmware_probe(sc)) 634 return 0; 635 636 abort: 637 if (aligned) { 638 sc->fw_name = mxge_fw_aligned; 639 sc->tx_boundary = 4096; 640 } else { 641 sc->fw_name = mxge_fw_unaligned; 642 sc->tx_boundary = 2048; 643 } 644 return (mxge_load_firmware(sc, 0)); 645 } 646 647 static int 648 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 649 { 650 651 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 652 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 653 be32toh(hdr->mcp_type)); 654 return EIO; 655 } 656 657 /* save firmware version for sysctl */ 658 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 659 if (mxge_verbose) 660 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 661 662 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 663 &sc->fw_ver_minor, &sc->fw_ver_tiny); 664 665 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 666 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 667 device_printf(sc->dev, "Found firmware version %s\n", 668 sc->fw_version); 669 device_printf(sc->dev, "Driver needs %d.%d\n", 670 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 671 return EINVAL; 672 } 673 return 0; 674 675 } 676 677 static int 678 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 679 { 680 z_stream zs; 681 char *inflate_buffer; 682 const struct firmware *fw; 683 const mcp_gen_header_t *hdr; 684 unsigned hdr_offset; 685 int status; 686 unsigned int i; 687 size_t fw_len; 688 689 fw = firmware_get(sc->fw_name); 690 if (fw == NULL) { 691 device_printf(sc->dev, "Could not find firmware image %s\n", 692 sc->fw_name); 693 return ENOENT; 694 } 695 696 /* setup zlib and decompress f/w */ 697 bzero(&zs, sizeof (zs)); 698 zs.zalloc = zcalloc_nowait; 699 zs.zfree = zcfree; 700 status = inflateInit(&zs); 701 if (status != Z_OK) { 702 status = EIO; 703 goto abort_with_fw; 704 } 705 706 /* the uncompressed size is stored as the firmware version, 707 which would otherwise go unused */ 708 fw_len = (size_t) fw->version; 709 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 710 if (inflate_buffer == NULL) 711 goto abort_with_zs; 712 zs.avail_in = fw->datasize; 713 zs.next_in = __DECONST(char *, fw->data); 714 zs.avail_out = fw_len; 715 zs.next_out = inflate_buffer; 716 status = inflate(&zs, Z_FINISH); 717 if (status != Z_STREAM_END) { 718 device_printf(sc->dev, "zlib %d\n", status); 719 status = EIO; 720 goto abort_with_buffer; 721 } 722 723 /* check id */ 724 hdr_offset = htobe32(*(const uint32_t *) 725 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 726 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 727 device_printf(sc->dev, "Bad firmware file"); 728 status = EIO; 729 goto abort_with_buffer; 730 } 731 hdr = (const void*)(inflate_buffer + hdr_offset); 732 733 status = mxge_validate_firmware(sc, hdr); 734 if (status != 0) 735 goto abort_with_buffer; 736 737 /* Copy the inflated firmware to NIC SRAM. */ 738 for (i = 0; i < fw_len; i += 256) { 739 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 740 inflate_buffer + i, 741 min(256U, (unsigned)(fw_len - i))); 742 wmb(); 743 (void)*sc->sram; 744 wmb(); 745 } 746 747 *limit = fw_len; 748 status = 0; 749 abort_with_buffer: 750 free(inflate_buffer, M_TEMP); 751 abort_with_zs: 752 inflateEnd(&zs); 753 abort_with_fw: 754 firmware_put(fw, FIRMWARE_UNLOAD); 755 return status; 756 } 757 758 /* 759 * Enable or disable periodic RDMAs from the host to make certain 760 * chipsets resend dropped PCIe messages 761 */ 762 763 static void 764 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 765 { 766 char buf_bytes[72]; 767 volatile uint32_t *confirm; 768 volatile char *submit; 769 uint32_t *buf, dma_low, dma_high; 770 int i; 771 772 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 773 774 /* clear confirmation addr */ 775 confirm = (volatile uint32_t *)sc->cmd; 776 *confirm = 0; 777 wmb(); 778 779 /* send an rdma command to the PCIe engine, and wait for the 780 response in the confirmation address. The firmware should 781 write a -1 there to indicate it is alive and well 782 */ 783 784 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 785 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 786 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 787 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 788 buf[2] = htobe32(0xffffffff); /* confirm data */ 789 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 790 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 791 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 792 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 793 buf[5] = htobe32(enable); /* enable? */ 794 795 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 796 797 mxge_pio_copy(submit, buf, 64); 798 wmb(); 799 DELAY(1000); 800 wmb(); 801 i = 0; 802 while (*confirm != 0xffffffff && i < 20) { 803 DELAY(1000); 804 i++; 805 } 806 if (*confirm != 0xffffffff) { 807 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 808 (enable ? "enable" : "disable"), confirm, 809 *confirm); 810 } 811 return; 812 } 813 814 static int 815 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 816 { 817 mcp_cmd_t *buf; 818 char buf_bytes[sizeof(*buf) + 8]; 819 volatile mcp_cmd_response_t *response = sc->cmd; 820 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 821 uint32_t dma_low, dma_high; 822 int err, sleep_total = 0; 823 824 /* ensure buf is aligned to 8 bytes */ 825 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 826 827 buf->data0 = htobe32(data->data0); 828 buf->data1 = htobe32(data->data1); 829 buf->data2 = htobe32(data->data2); 830 buf->cmd = htobe32(cmd); 831 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 832 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 833 834 buf->response_addr.low = htobe32(dma_low); 835 buf->response_addr.high = htobe32(dma_high); 836 mtx_lock(&sc->cmd_mtx); 837 response->result = 0xffffffff; 838 wmb(); 839 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 840 841 /* wait up to 20ms */ 842 err = EAGAIN; 843 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 844 bus_dmamap_sync(sc->cmd_dma.dmat, 845 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 846 wmb(); 847 switch (be32toh(response->result)) { 848 case 0: 849 data->data0 = be32toh(response->data); 850 err = 0; 851 break; 852 case 0xffffffff: 853 DELAY(1000); 854 break; 855 case MXGEFW_CMD_UNKNOWN: 856 err = ENOSYS; 857 break; 858 case MXGEFW_CMD_ERROR_UNALIGNED: 859 err = E2BIG; 860 break; 861 case MXGEFW_CMD_ERROR_BUSY: 862 err = EBUSY; 863 break; 864 case MXGEFW_CMD_ERROR_I2C_ABSENT: 865 err = ENXIO; 866 break; 867 default: 868 device_printf(sc->dev, 869 "mxge: command %d " 870 "failed, result = %d\n", 871 cmd, be32toh(response->result)); 872 err = ENXIO; 873 break; 874 } 875 if (err != EAGAIN) 876 break; 877 } 878 if (err == EAGAIN) 879 device_printf(sc->dev, "mxge: command %d timed out" 880 "result = %d\n", 881 cmd, be32toh(response->result)); 882 mtx_unlock(&sc->cmd_mtx); 883 return err; 884 } 885 886 static int 887 mxge_adopt_running_firmware(mxge_softc_t *sc) 888 { 889 struct mcp_gen_header *hdr; 890 const size_t bytes = sizeof (struct mcp_gen_header); 891 size_t hdr_offset; 892 int status; 893 894 /* find running firmware header */ 895 hdr_offset = htobe32(*(volatile uint32_t *) 896 (sc->sram + MCP_HEADER_PTR_OFFSET)); 897 898 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 899 device_printf(sc->dev, 900 "Running firmware has bad header offset (%d)\n", 901 (int)hdr_offset); 902 return EIO; 903 } 904 905 /* copy header of running firmware from SRAM to host memory to 906 * validate firmware */ 907 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 908 if (hdr == NULL) { 909 device_printf(sc->dev, "could not malloc firmware hdr\n"); 910 return ENOMEM; 911 } 912 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 913 rman_get_bushandle(sc->mem_res), 914 hdr_offset, (char *)hdr, bytes); 915 status = mxge_validate_firmware(sc, hdr); 916 free(hdr, M_DEVBUF); 917 918 /* 919 * check to see if adopted firmware has bug where adopting 920 * it will cause broadcasts to be filtered unless the NIC 921 * is kept in ALLMULTI mode 922 */ 923 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 924 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 925 sc->adopted_rx_filter_bug = 1; 926 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 927 "working around rx filter bug\n", 928 sc->fw_ver_major, sc->fw_ver_minor, 929 sc->fw_ver_tiny); 930 } 931 932 return status; 933 } 934 935 static int 936 mxge_load_firmware(mxge_softc_t *sc, int adopt) 937 { 938 volatile uint32_t *confirm; 939 volatile char *submit; 940 char buf_bytes[72]; 941 uint32_t *buf, size, dma_low, dma_high; 942 int status, i; 943 944 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 945 946 size = sc->sram_size; 947 status = mxge_load_firmware_helper(sc, &size); 948 if (status) { 949 if (!adopt) 950 return status; 951 /* Try to use the currently running firmware, if 952 it is new enough */ 953 status = mxge_adopt_running_firmware(sc); 954 if (status) { 955 device_printf(sc->dev, 956 "failed to adopt running firmware\n"); 957 return status; 958 } 959 device_printf(sc->dev, 960 "Successfully adopted running firmware\n"); 961 if (sc->tx_boundary == 4096) { 962 device_printf(sc->dev, 963 "Using firmware currently running on NIC" 964 ". For optimal\n"); 965 device_printf(sc->dev, 966 "performance consider loading optimized " 967 "firmware\n"); 968 } 969 sc->fw_name = mxge_fw_unaligned; 970 sc->tx_boundary = 2048; 971 return 0; 972 } 973 /* clear confirmation addr */ 974 confirm = (volatile uint32_t *)sc->cmd; 975 *confirm = 0; 976 wmb(); 977 /* send a reload command to the bootstrap MCP, and wait for the 978 response in the confirmation address. The firmware should 979 write a -1 there to indicate it is alive and well 980 */ 981 982 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 983 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 984 985 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 986 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 987 buf[2] = htobe32(0xffffffff); /* confirm data */ 988 989 /* FIX: All newest firmware should un-protect the bottom of 990 the sram before handoff. However, the very first interfaces 991 do not. Therefore the handoff copy must skip the first 8 bytes 992 */ 993 /* where the code starts*/ 994 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 995 buf[4] = htobe32(size - 8); /* length of code */ 996 buf[5] = htobe32(8); /* where to copy to */ 997 buf[6] = htobe32(0); /* where to jump to */ 998 999 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1000 mxge_pio_copy(submit, buf, 64); 1001 wmb(); 1002 DELAY(1000); 1003 wmb(); 1004 i = 0; 1005 while (*confirm != 0xffffffff && i < 20) { 1006 DELAY(1000*10); 1007 i++; 1008 bus_dmamap_sync(sc->cmd_dma.dmat, 1009 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1010 } 1011 if (*confirm != 0xffffffff) { 1012 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1013 confirm, *confirm); 1014 1015 return ENXIO; 1016 } 1017 return 0; 1018 } 1019 1020 static int 1021 mxge_update_mac_address(mxge_softc_t *sc) 1022 { 1023 mxge_cmd_t cmd; 1024 uint8_t *addr = sc->mac_addr; 1025 int status; 1026 1027 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1028 | (addr[2] << 8) | addr[3]); 1029 1030 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1031 1032 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1033 return status; 1034 } 1035 1036 static int 1037 mxge_change_pause(mxge_softc_t *sc, int pause) 1038 { 1039 mxge_cmd_t cmd; 1040 int status; 1041 1042 if (pause) 1043 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1044 &cmd); 1045 else 1046 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1047 &cmd); 1048 1049 if (status) { 1050 device_printf(sc->dev, "Failed to set flow control mode\n"); 1051 return ENXIO; 1052 } 1053 sc->pause = pause; 1054 return 0; 1055 } 1056 1057 static void 1058 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1059 { 1060 mxge_cmd_t cmd; 1061 int status; 1062 1063 if (mxge_always_promisc) 1064 promisc = 1; 1065 1066 if (promisc) 1067 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1068 &cmd); 1069 else 1070 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1071 &cmd); 1072 1073 if (status) { 1074 device_printf(sc->dev, "Failed to set promisc mode\n"); 1075 } 1076 } 1077 1078 struct mxge_add_maddr_ctx { 1079 mxge_softc_t *sc; 1080 int error; 1081 }; 1082 1083 static u_int 1084 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1085 { 1086 struct mxge_add_maddr_ctx *ctx = arg; 1087 mxge_cmd_t cmd; 1088 1089 if (ctx->error != 0) 1090 return (0); 1091 bcopy(LLADDR(sdl), &cmd.data0, 4); 1092 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1093 cmd.data0 = htonl(cmd.data0); 1094 cmd.data1 = htonl(cmd.data1); 1095 1096 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1097 1098 return (1); 1099 } 1100 1101 static void 1102 mxge_set_multicast_list(mxge_softc_t *sc) 1103 { 1104 struct mxge_add_maddr_ctx ctx; 1105 struct ifnet *ifp = sc->ifp; 1106 mxge_cmd_t cmd; 1107 int err; 1108 1109 /* This firmware is known to not support multicast */ 1110 if (!sc->fw_multicast_support) 1111 return; 1112 1113 /* Disable multicast filtering while we play with the lists*/ 1114 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1115 if (err != 0) { 1116 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1117 " error status: %d\n", err); 1118 return; 1119 } 1120 1121 if (sc->adopted_rx_filter_bug) 1122 return; 1123 1124 if (ifp->if_flags & IFF_ALLMULTI) 1125 /* request to disable multicast filtering, so quit here */ 1126 return; 1127 1128 /* Flush all the filters */ 1129 1130 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1131 if (err != 0) { 1132 device_printf(sc->dev, 1133 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1134 ", error status: %d\n", err); 1135 return; 1136 } 1137 1138 /* Walk the multicast list, and add each address */ 1139 ctx.sc = sc; 1140 ctx.error = 0; 1141 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1142 if (ctx.error != 0) { 1143 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1144 "error status:" "%d\t", ctx.error); 1145 /* abort, leaving multicast filtering off */ 1146 return; 1147 } 1148 1149 /* Enable multicast filtering */ 1150 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1151 if (err != 0) { 1152 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1153 ", error status: %d\n", err); 1154 } 1155 } 1156 1157 static int 1158 mxge_max_mtu(mxge_softc_t *sc) 1159 { 1160 mxge_cmd_t cmd; 1161 int status; 1162 1163 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1164 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1165 1166 /* try to set nbufs to see if it we can 1167 use virtually contiguous jumbos */ 1168 cmd.data0 = 0; 1169 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1170 &cmd); 1171 if (status == 0) 1172 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1173 1174 /* otherwise, we're limited to MJUMPAGESIZE */ 1175 return MJUMPAGESIZE - MXGEFW_PAD; 1176 } 1177 1178 static int 1179 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1180 { 1181 struct mxge_slice_state *ss; 1182 mxge_rx_done_t *rx_done; 1183 volatile uint32_t *irq_claim; 1184 mxge_cmd_t cmd; 1185 int slice, status; 1186 1187 /* try to send a reset command to the card to see if it 1188 is alive */ 1189 memset(&cmd, 0, sizeof (cmd)); 1190 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1191 if (status != 0) { 1192 device_printf(sc->dev, "failed reset\n"); 1193 return ENXIO; 1194 } 1195 1196 mxge_dummy_rdma(sc, 1); 1197 1198 /* set the intrq size */ 1199 cmd.data0 = sc->rx_ring_size; 1200 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1201 1202 /* 1203 * Even though we already know how many slices are supported 1204 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1205 * has magic side effects, and must be called after a reset. 1206 * It must be called prior to calling any RSS related cmds, 1207 * including assigning an interrupt queue for anything but 1208 * slice 0. It must also be called *after* 1209 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1210 * the firmware to compute offsets. 1211 */ 1212 1213 if (sc->num_slices > 1) { 1214 /* ask the maximum number of slices it supports */ 1215 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1216 &cmd); 1217 if (status != 0) { 1218 device_printf(sc->dev, 1219 "failed to get number of slices\n"); 1220 return status; 1221 } 1222 /* 1223 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1224 * to setting up the interrupt queue DMA 1225 */ 1226 cmd.data0 = sc->num_slices; 1227 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1228 #ifdef IFNET_BUF_RING 1229 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1230 #endif 1231 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1232 &cmd); 1233 if (status != 0) { 1234 device_printf(sc->dev, 1235 "failed to set number of slices\n"); 1236 return status; 1237 } 1238 } 1239 1240 if (interrupts_setup) { 1241 /* Now exchange information about interrupts */ 1242 for (slice = 0; slice < sc->num_slices; slice++) { 1243 rx_done = &sc->ss[slice].rx_done; 1244 memset(rx_done->entry, 0, sc->rx_ring_size); 1245 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1246 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1247 cmd.data2 = slice; 1248 status |= mxge_send_cmd(sc, 1249 MXGEFW_CMD_SET_INTRQ_DMA, 1250 &cmd); 1251 } 1252 } 1253 1254 status |= mxge_send_cmd(sc, 1255 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1256 1257 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1258 1259 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1260 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1261 1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1263 &cmd); 1264 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1265 if (status != 0) { 1266 device_printf(sc->dev, "failed set interrupt parameters\n"); 1267 return status; 1268 } 1269 1270 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1271 1272 /* run a DMA benchmark */ 1273 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1274 1275 for (slice = 0; slice < sc->num_slices; slice++) { 1276 ss = &sc->ss[slice]; 1277 1278 ss->irq_claim = irq_claim + (2 * slice); 1279 /* reset mcp/driver shared state back to 0 */ 1280 ss->rx_done.idx = 0; 1281 ss->rx_done.cnt = 0; 1282 ss->tx.req = 0; 1283 ss->tx.done = 0; 1284 ss->tx.pkt_done = 0; 1285 ss->tx.queue_active = 0; 1286 ss->tx.activate = 0; 1287 ss->tx.deactivate = 0; 1288 ss->tx.wake = 0; 1289 ss->tx.defrag = 0; 1290 ss->tx.stall = 0; 1291 ss->rx_big.cnt = 0; 1292 ss->rx_small.cnt = 0; 1293 ss->lc.lro_bad_csum = 0; 1294 ss->lc.lro_queued = 0; 1295 ss->lc.lro_flushed = 0; 1296 if (ss->fw_stats != NULL) { 1297 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1298 } 1299 } 1300 sc->rdma_tags_available = 15; 1301 status = mxge_update_mac_address(sc); 1302 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1303 mxge_change_pause(sc, sc->pause); 1304 mxge_set_multicast_list(sc); 1305 if (sc->throttle) { 1306 cmd.data0 = sc->throttle; 1307 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1308 &cmd)) { 1309 device_printf(sc->dev, 1310 "can't enable throttle\n"); 1311 } 1312 } 1313 return status; 1314 } 1315 1316 static int 1317 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1318 { 1319 mxge_cmd_t cmd; 1320 mxge_softc_t *sc; 1321 int err; 1322 unsigned int throttle; 1323 1324 sc = arg1; 1325 throttle = sc->throttle; 1326 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1327 if (err != 0) { 1328 return err; 1329 } 1330 1331 if (throttle == sc->throttle) 1332 return 0; 1333 1334 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1335 return EINVAL; 1336 1337 mtx_lock(&sc->driver_mtx); 1338 cmd.data0 = throttle; 1339 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1340 if (err == 0) 1341 sc->throttle = throttle; 1342 mtx_unlock(&sc->driver_mtx); 1343 return err; 1344 } 1345 1346 static int 1347 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1348 { 1349 mxge_softc_t *sc; 1350 unsigned int intr_coal_delay; 1351 int err; 1352 1353 sc = arg1; 1354 intr_coal_delay = sc->intr_coal_delay; 1355 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1356 if (err != 0) { 1357 return err; 1358 } 1359 if (intr_coal_delay == sc->intr_coal_delay) 1360 return 0; 1361 1362 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1363 return EINVAL; 1364 1365 mtx_lock(&sc->driver_mtx); 1366 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1367 sc->intr_coal_delay = intr_coal_delay; 1368 1369 mtx_unlock(&sc->driver_mtx); 1370 return err; 1371 } 1372 1373 static int 1374 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1375 { 1376 mxge_softc_t *sc; 1377 unsigned int enabled; 1378 int err; 1379 1380 sc = arg1; 1381 enabled = sc->pause; 1382 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1383 if (err != 0) { 1384 return err; 1385 } 1386 if (enabled == sc->pause) 1387 return 0; 1388 1389 mtx_lock(&sc->driver_mtx); 1390 err = mxge_change_pause(sc, enabled); 1391 mtx_unlock(&sc->driver_mtx); 1392 return err; 1393 } 1394 1395 static int 1396 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1397 { 1398 int err; 1399 1400 if (arg1 == NULL) 1401 return EFAULT; 1402 arg2 = be32toh(*(int *)arg1); 1403 arg1 = NULL; 1404 err = sysctl_handle_int(oidp, arg1, arg2, req); 1405 1406 return err; 1407 } 1408 1409 static void 1410 mxge_rem_sysctls(mxge_softc_t *sc) 1411 { 1412 struct mxge_slice_state *ss; 1413 int slice; 1414 1415 if (sc->slice_sysctl_tree == NULL) 1416 return; 1417 1418 for (slice = 0; slice < sc->num_slices; slice++) { 1419 ss = &sc->ss[slice]; 1420 if (ss == NULL || ss->sysctl_tree == NULL) 1421 continue; 1422 sysctl_ctx_free(&ss->sysctl_ctx); 1423 ss->sysctl_tree = NULL; 1424 } 1425 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1426 sc->slice_sysctl_tree = NULL; 1427 } 1428 1429 static void 1430 mxge_add_sysctls(mxge_softc_t *sc) 1431 { 1432 struct sysctl_ctx_list *ctx; 1433 struct sysctl_oid_list *children; 1434 mcp_irq_data_t *fw; 1435 struct mxge_slice_state *ss; 1436 int slice; 1437 char slice_num[8]; 1438 1439 ctx = device_get_sysctl_ctx(sc->dev); 1440 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1441 fw = sc->ss[0].fw_stats; 1442 1443 /* random information */ 1444 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1445 "firmware_version", 1446 CTLFLAG_RD, sc->fw_version, 1447 0, "firmware version"); 1448 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1449 "serial_number", 1450 CTLFLAG_RD, sc->serial_number_string, 1451 0, "serial number"); 1452 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1453 "product_code", 1454 CTLFLAG_RD, sc->product_code_string, 1455 0, "product_code"); 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1457 "pcie_link_width", 1458 CTLFLAG_RD, &sc->link_width, 1459 0, "tx_boundary"); 1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1461 "tx_boundary", 1462 CTLFLAG_RD, &sc->tx_boundary, 1463 0, "tx_boundary"); 1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1465 "write_combine", 1466 CTLFLAG_RD, &sc->wc, 1467 0, "write combining PIO?"); 1468 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1469 "read_dma_MBs", 1470 CTLFLAG_RD, &sc->read_dma, 1471 0, "DMA Read speed in MB/s"); 1472 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1473 "write_dma_MBs", 1474 CTLFLAG_RD, &sc->write_dma, 1475 0, "DMA Write speed in MB/s"); 1476 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1477 "read_write_dma_MBs", 1478 CTLFLAG_RD, &sc->read_write_dma, 1479 0, "DMA concurrent Read/Write speed in MB/s"); 1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1481 "watchdog_resets", 1482 CTLFLAG_RD, &sc->watchdog_resets, 1483 0, "Number of times NIC was reset"); 1484 1485 /* performance related tunables */ 1486 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1487 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1488 sc, 0, mxge_change_intr_coal, "I", 1489 "interrupt coalescing delay in usecs"); 1490 1491 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1492 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1493 mxge_change_throttle, "I", "transmit throttling"); 1494 1495 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1496 "flow_control_enabled", 1497 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1498 mxge_change_flow_control, "I", 1499 "interrupt coalescing delay in usecs"); 1500 1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1502 "deassert_wait", 1503 CTLFLAG_RW, &mxge_deassert_wait, 1504 0, "Wait for IRQ line to go low in ihandler"); 1505 1506 /* stats block from firmware is in network byte order. 1507 Need to swap it */ 1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1509 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1510 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1513 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1514 "rdma_tags_available"); 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1516 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1517 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1518 "dropped_bad_crc32"); 1519 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1520 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1521 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1523 "dropped_link_error_or_filtered", 1524 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1525 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1526 "dropped_link_error_or_filtered"); 1527 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1528 "dropped_link_overflow", 1529 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1530 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1531 "dropped_link_overflow"); 1532 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1533 "dropped_multicast_filtered", 1534 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1535 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1536 "dropped_multicast_filtered"); 1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1538 "dropped_no_big_buffer", 1539 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1540 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1541 "dropped_no_big_buffer"); 1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1543 "dropped_no_small_buffer", 1544 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1545 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1546 "dropped_no_small_buffer"); 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "dropped_overrun", 1549 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1550 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1551 "dropped_overrun"); 1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1553 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1554 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1556 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1557 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1558 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "dropped_unicast_filtered", 1561 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1562 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1563 "dropped_unicast_filtered"); 1564 1565 /* verbose printing? */ 1566 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1567 "verbose", 1568 CTLFLAG_RW, &mxge_verbose, 1569 0, "verbose printing"); 1570 1571 /* add counters exported for debugging from all slices */ 1572 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1573 sc->slice_sysctl_tree = 1574 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1575 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1576 1577 for (slice = 0; slice < sc->num_slices; slice++) { 1578 ss = &sc->ss[slice]; 1579 sysctl_ctx_init(&ss->sysctl_ctx); 1580 ctx = &ss->sysctl_ctx; 1581 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1582 sprintf(slice_num, "%d", slice); 1583 ss->sysctl_tree = 1584 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1585 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1586 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1587 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1588 "rx_small_cnt", 1589 CTLFLAG_RD, &ss->rx_small.cnt, 1590 0, "rx_small_cnt"); 1591 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1592 "rx_big_cnt", 1593 CTLFLAG_RD, &ss->rx_big.cnt, 1594 0, "rx_small_cnt"); 1595 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1596 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1597 0, "number of lro merge queues flushed"); 1598 1599 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1600 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1601 0, "number of bad csums preventing LRO"); 1602 1603 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1604 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1605 0, "number of frames appended to lro merge" 1606 "queues"); 1607 1608 #ifndef IFNET_BUF_RING 1609 /* only transmit from slice 0 for now */ 1610 if (slice > 0) 1611 continue; 1612 #endif 1613 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1614 "tx_req", 1615 CTLFLAG_RD, &ss->tx.req, 1616 0, "tx_req"); 1617 1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1619 "tx_done", 1620 CTLFLAG_RD, &ss->tx.done, 1621 0, "tx_done"); 1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1623 "tx_pkt_done", 1624 CTLFLAG_RD, &ss->tx.pkt_done, 1625 0, "tx_done"); 1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1627 "tx_stall", 1628 CTLFLAG_RD, &ss->tx.stall, 1629 0, "tx_stall"); 1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1631 "tx_wake", 1632 CTLFLAG_RD, &ss->tx.wake, 1633 0, "tx_wake"); 1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1635 "tx_defrag", 1636 CTLFLAG_RD, &ss->tx.defrag, 1637 0, "tx_defrag"); 1638 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1639 "tx_queue_active", 1640 CTLFLAG_RD, &ss->tx.queue_active, 1641 0, "tx_queue_active"); 1642 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1643 "tx_activate", 1644 CTLFLAG_RD, &ss->tx.activate, 1645 0, "tx_activate"); 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "tx_deactivate", 1648 CTLFLAG_RD, &ss->tx.deactivate, 1649 0, "tx_deactivate"); 1650 } 1651 } 1652 1653 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1654 backwards one at a time and handle ring wraps */ 1655 1656 static inline void 1657 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1658 mcp_kreq_ether_send_t *src, int cnt) 1659 { 1660 int idx, starting_slot; 1661 starting_slot = tx->req; 1662 while (cnt > 1) { 1663 cnt--; 1664 idx = (starting_slot + cnt) & tx->mask; 1665 mxge_pio_copy(&tx->lanai[idx], 1666 &src[cnt], sizeof(*src)); 1667 wmb(); 1668 } 1669 } 1670 1671 /* 1672 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1673 * at most 32 bytes at a time, so as to avoid involving the software 1674 * pio handler in the nic. We re-write the first segment's flags 1675 * to mark them valid only after writing the entire chain 1676 */ 1677 1678 static inline void 1679 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1680 int cnt) 1681 { 1682 int idx, i; 1683 uint32_t *src_ints; 1684 volatile uint32_t *dst_ints; 1685 mcp_kreq_ether_send_t *srcp; 1686 volatile mcp_kreq_ether_send_t *dstp, *dst; 1687 uint8_t last_flags; 1688 1689 idx = tx->req & tx->mask; 1690 1691 last_flags = src->flags; 1692 src->flags = 0; 1693 wmb(); 1694 dst = dstp = &tx->lanai[idx]; 1695 srcp = src; 1696 1697 if ((idx + cnt) < tx->mask) { 1698 for (i = 0; i < (cnt - 1); i += 2) { 1699 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1700 wmb(); /* force write every 32 bytes */ 1701 srcp += 2; 1702 dstp += 2; 1703 } 1704 } else { 1705 /* submit all but the first request, and ensure 1706 that it is submitted below */ 1707 mxge_submit_req_backwards(tx, src, cnt); 1708 i = 0; 1709 } 1710 if (i < cnt) { 1711 /* submit the first request */ 1712 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1713 wmb(); /* barrier before setting valid flag */ 1714 } 1715 1716 /* re-write the last 32-bits with the valid flags */ 1717 src->flags = last_flags; 1718 src_ints = (uint32_t *)src; 1719 src_ints+=3; 1720 dst_ints = (volatile uint32_t *)dst; 1721 dst_ints+=3; 1722 *dst_ints = *src_ints; 1723 tx->req += cnt; 1724 wmb(); 1725 } 1726 1727 static int 1728 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1729 struct mxge_pkt_info *pi) 1730 { 1731 struct ether_vlan_header *eh; 1732 uint16_t etype; 1733 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1734 #if IFCAP_TSO6 && defined(INET6) 1735 int nxt; 1736 #endif 1737 1738 eh = mtod(m, struct ether_vlan_header *); 1739 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1740 etype = ntohs(eh->evl_proto); 1741 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1742 } else { 1743 etype = ntohs(eh->evl_encap_proto); 1744 pi->ip_off = ETHER_HDR_LEN; 1745 } 1746 1747 switch (etype) { 1748 case ETHERTYPE_IP: 1749 /* 1750 * ensure ip header is in first mbuf, copy it to a 1751 * scratch buffer if not 1752 */ 1753 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1754 pi->ip6 = NULL; 1755 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1756 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1757 ss->scratch); 1758 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1759 } 1760 pi->ip_hlen = pi->ip->ip_hl << 2; 1761 if (!tso) 1762 return 0; 1763 1764 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1765 sizeof(struct tcphdr))) { 1766 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1767 sizeof(struct tcphdr), ss->scratch); 1768 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1769 } 1770 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1771 break; 1772 #if IFCAP_TSO6 && defined(INET6) 1773 case ETHERTYPE_IPV6: 1774 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1775 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1776 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1777 ss->scratch); 1778 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1779 } 1780 nxt = 0; 1781 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1782 pi->ip_hlen -= pi->ip_off; 1783 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1784 return EINVAL; 1785 1786 if (!tso) 1787 return 0; 1788 1789 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1790 return EINVAL; 1791 1792 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1793 sizeof(struct tcphdr))) { 1794 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1795 sizeof(struct tcphdr), ss->scratch); 1796 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1797 } 1798 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1799 break; 1800 #endif 1801 default: 1802 return EINVAL; 1803 } 1804 return 0; 1805 } 1806 1807 #if IFCAP_TSO4 1808 1809 static void 1810 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1811 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1812 { 1813 mxge_tx_ring_t *tx; 1814 mcp_kreq_ether_send_t *req; 1815 bus_dma_segment_t *seg; 1816 uint32_t low, high_swapped; 1817 int len, seglen, cum_len, cum_len_next; 1818 int next_is_first, chop, cnt, rdma_count, small; 1819 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1820 uint8_t flags, flags_next; 1821 static int once; 1822 1823 mss = m->m_pkthdr.tso_segsz; 1824 1825 /* negative cum_len signifies to the 1826 * send loop that we are still in the 1827 * header portion of the TSO packet. 1828 */ 1829 1830 cksum_offset = pi->ip_off + pi->ip_hlen; 1831 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1832 1833 /* TSO implies checksum offload on this hardware */ 1834 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1835 /* 1836 * If packet has full TCP csum, replace it with pseudo hdr 1837 * sum that the NIC expects, otherwise the NIC will emit 1838 * packets with bad TCP checksums. 1839 */ 1840 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1841 if (pi->ip6) { 1842 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1843 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1844 sum = in6_cksum_pseudo(pi->ip6, 1845 m->m_pkthdr.len - cksum_offset, 1846 IPPROTO_TCP, 0); 1847 #endif 1848 } else { 1849 #ifdef INET 1850 m->m_pkthdr.csum_flags |= CSUM_TCP; 1851 sum = in_pseudo(pi->ip->ip_src.s_addr, 1852 pi->ip->ip_dst.s_addr, 1853 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1854 cksum_offset))); 1855 #endif 1856 } 1857 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1858 cksum_offset, sizeof(sum), (caddr_t)&sum); 1859 } 1860 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1861 1862 /* for TSO, pseudo_hdr_offset holds mss. 1863 * The firmware figures out where to put 1864 * the checksum by parsing the header. */ 1865 pseudo_hdr_offset = htobe16(mss); 1866 1867 if (pi->ip6) { 1868 /* 1869 * for IPv6 TSO, the "checksum offset" is re-purposed 1870 * to store the TCP header len 1871 */ 1872 cksum_offset = (pi->tcp->th_off << 2); 1873 } 1874 1875 tx = &ss->tx; 1876 req = tx->req_list; 1877 seg = tx->seg_list; 1878 cnt = 0; 1879 rdma_count = 0; 1880 /* "rdma_count" is the number of RDMAs belonging to the 1881 * current packet BEFORE the current send request. For 1882 * non-TSO packets, this is equal to "count". 1883 * For TSO packets, rdma_count needs to be reset 1884 * to 0 after a segment cut. 1885 * 1886 * The rdma_count field of the send request is 1887 * the number of RDMAs of the packet starting at 1888 * that request. For TSO send requests with one ore more cuts 1889 * in the middle, this is the number of RDMAs starting 1890 * after the last cut in the request. All previous 1891 * segments before the last cut implicitly have 1 RDMA. 1892 * 1893 * Since the number of RDMAs is not known beforehand, 1894 * it must be filled-in retroactively - after each 1895 * segmentation cut or at the end of the entire packet. 1896 */ 1897 1898 while (busdma_seg_cnt) { 1899 /* Break the busdma segment up into pieces*/ 1900 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1901 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1902 len = seg->ds_len; 1903 1904 while (len) { 1905 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1906 seglen = len; 1907 cum_len_next = cum_len + seglen; 1908 (req-rdma_count)->rdma_count = rdma_count + 1; 1909 if (__predict_true(cum_len >= 0)) { 1910 /* payload */ 1911 chop = (cum_len_next > mss); 1912 cum_len_next = cum_len_next % mss; 1913 next_is_first = (cum_len_next == 0); 1914 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1915 flags_next |= next_is_first * 1916 MXGEFW_FLAGS_FIRST; 1917 rdma_count |= -(chop | next_is_first); 1918 rdma_count += chop & !next_is_first; 1919 } else if (cum_len_next >= 0) { 1920 /* header ends */ 1921 rdma_count = -1; 1922 cum_len_next = 0; 1923 seglen = -cum_len; 1924 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1925 flags_next = MXGEFW_FLAGS_TSO_PLD | 1926 MXGEFW_FLAGS_FIRST | 1927 (small * MXGEFW_FLAGS_SMALL); 1928 } 1929 1930 req->addr_high = high_swapped; 1931 req->addr_low = htobe32(low); 1932 req->pseudo_hdr_offset = pseudo_hdr_offset; 1933 req->pad = 0; 1934 req->rdma_count = 1; 1935 req->length = htobe16(seglen); 1936 req->cksum_offset = cksum_offset; 1937 req->flags = flags | ((cum_len & 1) * 1938 MXGEFW_FLAGS_ALIGN_ODD); 1939 low += seglen; 1940 len -= seglen; 1941 cum_len = cum_len_next; 1942 flags = flags_next; 1943 req++; 1944 cnt++; 1945 rdma_count++; 1946 if (cksum_offset != 0 && !pi->ip6) { 1947 if (__predict_false(cksum_offset > seglen)) 1948 cksum_offset -= seglen; 1949 else 1950 cksum_offset = 0; 1951 } 1952 if (__predict_false(cnt > tx->max_desc)) 1953 goto drop; 1954 } 1955 busdma_seg_cnt--; 1956 seg++; 1957 } 1958 (req-rdma_count)->rdma_count = rdma_count; 1959 1960 do { 1961 req--; 1962 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1963 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1964 1965 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1966 mxge_submit_req(tx, tx->req_list, cnt); 1967 #ifdef IFNET_BUF_RING 1968 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1969 /* tell the NIC to start polling this slice */ 1970 *tx->send_go = 1; 1971 tx->queue_active = 1; 1972 tx->activate++; 1973 wmb(); 1974 } 1975 #endif 1976 return; 1977 1978 drop: 1979 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1980 m_freem(m); 1981 ss->oerrors++; 1982 if (!once) { 1983 printf("tx->max_desc exceeded via TSO!\n"); 1984 printf("mss = %d, %ld, %d!\n", mss, 1985 (long)seg - (long)tx->seg_list, tx->max_desc); 1986 once = 1; 1987 } 1988 return; 1989 1990 } 1991 1992 #endif /* IFCAP_TSO4 */ 1993 1994 #ifdef MXGE_NEW_VLAN_API 1995 /* 1996 * We reproduce the software vlan tag insertion from 1997 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1998 * vlan tag insertion. We need to advertise this in order to have the 1999 * vlan interface respect our csum offload flags. 2000 */ 2001 static struct mbuf * 2002 mxge_vlan_tag_insert(struct mbuf *m) 2003 { 2004 struct ether_vlan_header *evl; 2005 2006 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2007 if (__predict_false(m == NULL)) 2008 return NULL; 2009 if (m->m_len < sizeof(*evl)) { 2010 m = m_pullup(m, sizeof(*evl)); 2011 if (__predict_false(m == NULL)) 2012 return NULL; 2013 } 2014 /* 2015 * Transform the Ethernet header into an Ethernet header 2016 * with 802.1Q encapsulation. 2017 */ 2018 evl = mtod(m, struct ether_vlan_header *); 2019 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2020 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2021 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2022 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2023 m->m_flags &= ~M_VLANTAG; 2024 return m; 2025 } 2026 #endif /* MXGE_NEW_VLAN_API */ 2027 2028 static void 2029 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2030 { 2031 struct mxge_pkt_info pi = {0,0,0,0}; 2032 mxge_softc_t *sc; 2033 mcp_kreq_ether_send_t *req; 2034 bus_dma_segment_t *seg; 2035 struct mbuf *m_tmp; 2036 mxge_tx_ring_t *tx; 2037 int cnt, cum_len, err, i, idx, odd_flag; 2038 uint16_t pseudo_hdr_offset; 2039 uint8_t flags, cksum_offset; 2040 2041 sc = ss->sc; 2042 tx = &ss->tx; 2043 2044 #ifdef MXGE_NEW_VLAN_API 2045 if (m->m_flags & M_VLANTAG) { 2046 m = mxge_vlan_tag_insert(m); 2047 if (__predict_false(m == NULL)) 2048 goto drop_without_m; 2049 } 2050 #endif 2051 if (m->m_pkthdr.csum_flags & 2052 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2053 if (mxge_parse_tx(ss, m, &pi)) 2054 goto drop; 2055 } 2056 2057 /* (try to) map the frame for DMA */ 2058 idx = tx->req & tx->mask; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2060 m, tx->seg_list, &cnt, 2061 BUS_DMA_NOWAIT); 2062 if (__predict_false(err == EFBIG)) { 2063 /* Too many segments in the chain. Try 2064 to defrag */ 2065 m_tmp = m_defrag(m, M_NOWAIT); 2066 if (m_tmp == NULL) { 2067 goto drop; 2068 } 2069 ss->tx.defrag++; 2070 m = m_tmp; 2071 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2072 tx->info[idx].map, 2073 m, tx->seg_list, &cnt, 2074 BUS_DMA_NOWAIT); 2075 } 2076 if (__predict_false(err != 0)) { 2077 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2078 " packet len = %d\n", err, m->m_pkthdr.len); 2079 goto drop; 2080 } 2081 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2082 BUS_DMASYNC_PREWRITE); 2083 tx->info[idx].m = m; 2084 2085 #if IFCAP_TSO4 2086 /* TSO is different enough, we handle it in another routine */ 2087 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2088 mxge_encap_tso(ss, m, cnt, &pi); 2089 return; 2090 } 2091 #endif 2092 2093 req = tx->req_list; 2094 cksum_offset = 0; 2095 pseudo_hdr_offset = 0; 2096 flags = MXGEFW_FLAGS_NO_TSO; 2097 2098 /* checksum offloading? */ 2099 if (m->m_pkthdr.csum_flags & 2100 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2101 /* ensure ip header is in first mbuf, copy 2102 it to a scratch buffer if not */ 2103 cksum_offset = pi.ip_off + pi.ip_hlen; 2104 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2105 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2106 req->cksum_offset = cksum_offset; 2107 flags |= MXGEFW_FLAGS_CKSUM; 2108 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2109 } else { 2110 odd_flag = 0; 2111 } 2112 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2113 flags |= MXGEFW_FLAGS_SMALL; 2114 2115 /* convert segments into a request list */ 2116 cum_len = 0; 2117 seg = tx->seg_list; 2118 req->flags = MXGEFW_FLAGS_FIRST; 2119 for (i = 0; i < cnt; i++) { 2120 req->addr_low = 2121 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2122 req->addr_high = 2123 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2124 req->length = htobe16(seg->ds_len); 2125 req->cksum_offset = cksum_offset; 2126 if (cksum_offset > seg->ds_len) 2127 cksum_offset -= seg->ds_len; 2128 else 2129 cksum_offset = 0; 2130 req->pseudo_hdr_offset = pseudo_hdr_offset; 2131 req->pad = 0; /* complete solid 16-byte block */ 2132 req->rdma_count = 1; 2133 req->flags |= flags | ((cum_len & 1) * odd_flag); 2134 cum_len += seg->ds_len; 2135 seg++; 2136 req++; 2137 req->flags = 0; 2138 } 2139 req--; 2140 /* pad runts to 60 bytes */ 2141 if (cum_len < 60) { 2142 req++; 2143 req->addr_low = 2144 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2145 req->addr_high = 2146 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2147 req->length = htobe16(60 - cum_len); 2148 req->cksum_offset = 0; 2149 req->pseudo_hdr_offset = pseudo_hdr_offset; 2150 req->pad = 0; /* complete solid 16-byte block */ 2151 req->rdma_count = 1; 2152 req->flags |= flags | ((cum_len & 1) * odd_flag); 2153 cnt++; 2154 } 2155 2156 tx->req_list[0].rdma_count = cnt; 2157 #if 0 2158 /* print what the firmware will see */ 2159 for (i = 0; i < cnt; i++) { 2160 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2161 "cso:%d, flags:0x%x, rdma:%d\n", 2162 i, (int)ntohl(tx->req_list[i].addr_high), 2163 (int)ntohl(tx->req_list[i].addr_low), 2164 (int)ntohs(tx->req_list[i].length), 2165 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2166 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2167 tx->req_list[i].rdma_count); 2168 } 2169 printf("--------------\n"); 2170 #endif 2171 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2172 mxge_submit_req(tx, tx->req_list, cnt); 2173 #ifdef IFNET_BUF_RING 2174 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2175 /* tell the NIC to start polling this slice */ 2176 *tx->send_go = 1; 2177 tx->queue_active = 1; 2178 tx->activate++; 2179 wmb(); 2180 } 2181 #endif 2182 return; 2183 2184 drop: 2185 m_freem(m); 2186 drop_without_m: 2187 ss->oerrors++; 2188 return; 2189 } 2190 2191 #ifdef IFNET_BUF_RING 2192 static void 2193 mxge_qflush(struct ifnet *ifp) 2194 { 2195 mxge_softc_t *sc = ifp->if_softc; 2196 mxge_tx_ring_t *tx; 2197 struct mbuf *m; 2198 int slice; 2199 2200 for (slice = 0; slice < sc->num_slices; slice++) { 2201 tx = &sc->ss[slice].tx; 2202 mtx_lock(&tx->mtx); 2203 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2204 m_freem(m); 2205 mtx_unlock(&tx->mtx); 2206 } 2207 if_qflush(ifp); 2208 } 2209 2210 static inline void 2211 mxge_start_locked(struct mxge_slice_state *ss) 2212 { 2213 mxge_softc_t *sc; 2214 struct mbuf *m; 2215 struct ifnet *ifp; 2216 mxge_tx_ring_t *tx; 2217 2218 sc = ss->sc; 2219 ifp = sc->ifp; 2220 tx = &ss->tx; 2221 2222 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2223 m = drbr_dequeue(ifp, tx->br); 2224 if (m == NULL) { 2225 return; 2226 } 2227 /* let BPF see it */ 2228 BPF_MTAP(ifp, m); 2229 2230 /* give it to the nic */ 2231 mxge_encap(ss, m); 2232 } 2233 /* ran out of transmit slots */ 2234 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2235 && (!drbr_empty(ifp, tx->br))) { 2236 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2237 tx->stall++; 2238 } 2239 } 2240 2241 static int 2242 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2243 { 2244 mxge_softc_t *sc; 2245 struct ifnet *ifp; 2246 mxge_tx_ring_t *tx; 2247 int err; 2248 2249 sc = ss->sc; 2250 ifp = sc->ifp; 2251 tx = &ss->tx; 2252 2253 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2254 IFF_DRV_RUNNING) { 2255 err = drbr_enqueue(ifp, tx->br, m); 2256 return (err); 2257 } 2258 2259 if (!drbr_needs_enqueue(ifp, tx->br) && 2260 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2261 /* let BPF see it */ 2262 BPF_MTAP(ifp, m); 2263 /* give it to the nic */ 2264 mxge_encap(ss, m); 2265 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2266 return (err); 2267 } 2268 if (!drbr_empty(ifp, tx->br)) 2269 mxge_start_locked(ss); 2270 return (0); 2271 } 2272 2273 static int 2274 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2275 { 2276 mxge_softc_t *sc = ifp->if_softc; 2277 struct mxge_slice_state *ss; 2278 mxge_tx_ring_t *tx; 2279 int err = 0; 2280 int slice; 2281 2282 slice = m->m_pkthdr.flowid; 2283 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2284 2285 ss = &sc->ss[slice]; 2286 tx = &ss->tx; 2287 2288 if (mtx_trylock(&tx->mtx)) { 2289 err = mxge_transmit_locked(ss, m); 2290 mtx_unlock(&tx->mtx); 2291 } else { 2292 err = drbr_enqueue(ifp, tx->br, m); 2293 } 2294 2295 return (err); 2296 } 2297 2298 #else 2299 2300 static inline void 2301 mxge_start_locked(struct mxge_slice_state *ss) 2302 { 2303 mxge_softc_t *sc; 2304 struct mbuf *m; 2305 struct ifnet *ifp; 2306 mxge_tx_ring_t *tx; 2307 2308 sc = ss->sc; 2309 ifp = sc->ifp; 2310 tx = &ss->tx; 2311 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2312 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2313 if (m == NULL) { 2314 return; 2315 } 2316 /* let BPF see it */ 2317 BPF_MTAP(ifp, m); 2318 2319 /* give it to the nic */ 2320 mxge_encap(ss, m); 2321 } 2322 /* ran out of transmit slots */ 2323 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2324 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2325 tx->stall++; 2326 } 2327 } 2328 #endif 2329 static void 2330 mxge_start(struct ifnet *ifp) 2331 { 2332 mxge_softc_t *sc = ifp->if_softc; 2333 struct mxge_slice_state *ss; 2334 2335 /* only use the first slice for now */ 2336 ss = &sc->ss[0]; 2337 mtx_lock(&ss->tx.mtx); 2338 mxge_start_locked(ss); 2339 mtx_unlock(&ss->tx.mtx); 2340 } 2341 2342 /* 2343 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2344 * at most 32 bytes at a time, so as to avoid involving the software 2345 * pio handler in the nic. We re-write the first segment's low 2346 * DMA address to mark it valid only after we write the entire chunk 2347 * in a burst 2348 */ 2349 static inline void 2350 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2351 mcp_kreq_ether_recv_t *src) 2352 { 2353 uint32_t low; 2354 2355 low = src->addr_low; 2356 src->addr_low = 0xffffffff; 2357 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2358 wmb(); 2359 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2360 wmb(); 2361 src->addr_low = low; 2362 dst->addr_low = low; 2363 wmb(); 2364 } 2365 2366 static int 2367 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2368 { 2369 bus_dma_segment_t seg; 2370 struct mbuf *m; 2371 mxge_rx_ring_t *rx = &ss->rx_small; 2372 int cnt, err; 2373 2374 m = m_gethdr(M_NOWAIT, MT_DATA); 2375 if (m == NULL) { 2376 rx->alloc_fail++; 2377 err = ENOBUFS; 2378 goto done; 2379 } 2380 m->m_len = MHLEN; 2381 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2382 &seg, &cnt, BUS_DMA_NOWAIT); 2383 if (err != 0) { 2384 m_free(m); 2385 goto done; 2386 } 2387 rx->info[idx].m = m; 2388 rx->shadow[idx].addr_low = 2389 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2390 rx->shadow[idx].addr_high = 2391 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2392 2393 done: 2394 if ((idx & 7) == 7) 2395 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2396 return err; 2397 } 2398 2399 static int 2400 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2401 { 2402 bus_dma_segment_t seg[3]; 2403 struct mbuf *m; 2404 mxge_rx_ring_t *rx = &ss->rx_big; 2405 int cnt, err, i; 2406 2407 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2408 if (m == NULL) { 2409 rx->alloc_fail++; 2410 err = ENOBUFS; 2411 goto done; 2412 } 2413 m->m_len = rx->mlen; 2414 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2415 seg, &cnt, BUS_DMA_NOWAIT); 2416 if (err != 0) { 2417 m_free(m); 2418 goto done; 2419 } 2420 rx->info[idx].m = m; 2421 rx->shadow[idx].addr_low = 2422 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2423 rx->shadow[idx].addr_high = 2424 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2425 2426 #if MXGE_VIRT_JUMBOS 2427 for (i = 1; i < cnt; i++) { 2428 rx->shadow[idx + i].addr_low = 2429 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2430 rx->shadow[idx + i].addr_high = 2431 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2432 } 2433 #endif 2434 2435 done: 2436 for (i = 0; i < rx->nbufs; i++) { 2437 if ((idx & 7) == 7) { 2438 mxge_submit_8rx(&rx->lanai[idx - 7], 2439 &rx->shadow[idx - 7]); 2440 } 2441 idx++; 2442 } 2443 return err; 2444 } 2445 2446 #ifdef INET6 2447 2448 static uint16_t 2449 mxge_csum_generic(uint16_t *raw, int len) 2450 { 2451 uint32_t csum; 2452 2453 csum = 0; 2454 while (len > 0) { 2455 csum += *raw; 2456 raw++; 2457 len -= 2; 2458 } 2459 csum = (csum >> 16) + (csum & 0xffff); 2460 csum = (csum >> 16) + (csum & 0xffff); 2461 return (uint16_t)csum; 2462 } 2463 2464 static inline uint16_t 2465 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2466 { 2467 uint32_t partial; 2468 int nxt, cksum_offset; 2469 struct ip6_hdr *ip6 = p; 2470 uint16_t c; 2471 2472 nxt = ip6->ip6_nxt; 2473 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2474 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2475 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2476 IPPROTO_IPV6, &nxt); 2477 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2478 return (1); 2479 } 2480 2481 /* 2482 * IPv6 headers do not contain a checksum, and hence 2483 * do not checksum to zero, so they don't "fall out" 2484 * of the partial checksum calculation like IPv4 2485 * headers do. We need to fix the partial checksum by 2486 * subtracting the checksum of the IPv6 header. 2487 */ 2488 2489 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2490 ETHER_HDR_LEN); 2491 csum += ~partial; 2492 csum += (csum < ~partial); 2493 csum = (csum >> 16) + (csum & 0xFFFF); 2494 csum = (csum >> 16) + (csum & 0xFFFF); 2495 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2496 csum); 2497 c ^= 0xffff; 2498 return (c); 2499 } 2500 #endif /* INET6 */ 2501 /* 2502 * Myri10GE hardware checksums are not valid if the sender 2503 * padded the frame with non-zero padding. This is because 2504 * the firmware just does a simple 16-bit 1s complement 2505 * checksum across the entire frame, excluding the first 14 2506 * bytes. It is best to simply to check the checksum and 2507 * tell the stack about it only if the checksum is good 2508 */ 2509 2510 static inline uint16_t 2511 mxge_rx_csum(struct mbuf *m, int csum) 2512 { 2513 struct ether_header *eh; 2514 #ifdef INET 2515 struct ip *ip; 2516 #endif 2517 #if defined(INET) || defined(INET6) 2518 int cap = m->m_pkthdr.rcvif->if_capenable; 2519 #endif 2520 uint16_t c, etype; 2521 2522 eh = mtod(m, struct ether_header *); 2523 etype = ntohs(eh->ether_type); 2524 switch (etype) { 2525 #ifdef INET 2526 case ETHERTYPE_IP: 2527 if ((cap & IFCAP_RXCSUM) == 0) 2528 return (1); 2529 ip = (struct ip *)(eh + 1); 2530 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2531 return (1); 2532 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2533 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2534 (ip->ip_hl << 2) + ip->ip_p)); 2535 c ^= 0xffff; 2536 break; 2537 #endif 2538 #ifdef INET6 2539 case ETHERTYPE_IPV6: 2540 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2541 return (1); 2542 c = mxge_rx_csum6((eh + 1), m, csum); 2543 break; 2544 #endif 2545 default: 2546 c = 1; 2547 } 2548 return (c); 2549 } 2550 2551 static void 2552 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2553 { 2554 struct ether_vlan_header *evl; 2555 uint32_t partial; 2556 2557 evl = mtod(m, struct ether_vlan_header *); 2558 2559 /* 2560 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2561 * after what the firmware thought was the end of the ethernet 2562 * header. 2563 */ 2564 2565 /* put checksum into host byte order */ 2566 *csum = ntohs(*csum); 2567 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2568 (*csum) += ~partial; 2569 (*csum) += ((*csum) < ~partial); 2570 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2571 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2572 2573 /* restore checksum to network byte order; 2574 later consumers expect this */ 2575 *csum = htons(*csum); 2576 2577 /* save the tag */ 2578 #ifdef MXGE_NEW_VLAN_API 2579 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2580 #else 2581 { 2582 struct m_tag *mtag; 2583 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2584 M_NOWAIT); 2585 if (mtag == NULL) 2586 return; 2587 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2588 m_tag_prepend(m, mtag); 2589 } 2590 2591 #endif 2592 m->m_flags |= M_VLANTAG; 2593 2594 /* 2595 * Remove the 802.1q header by copying the Ethernet 2596 * addresses over it and adjusting the beginning of 2597 * the data in the mbuf. The encapsulated Ethernet 2598 * type field is already in place. 2599 */ 2600 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2601 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2602 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2603 } 2604 2605 static inline void 2606 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2607 uint32_t csum, int lro) 2608 { 2609 mxge_softc_t *sc; 2610 struct ifnet *ifp; 2611 struct mbuf *m; 2612 struct ether_header *eh; 2613 mxge_rx_ring_t *rx; 2614 bus_dmamap_t old_map; 2615 int idx; 2616 2617 sc = ss->sc; 2618 ifp = sc->ifp; 2619 rx = &ss->rx_big; 2620 idx = rx->cnt & rx->mask; 2621 rx->cnt += rx->nbufs; 2622 /* save a pointer to the received mbuf */ 2623 m = rx->info[idx].m; 2624 /* try to replace the received mbuf */ 2625 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2626 /* drop the frame -- the old mbuf is re-cycled */ 2627 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2628 return; 2629 } 2630 2631 /* unmap the received buffer */ 2632 old_map = rx->info[idx].map; 2633 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2634 bus_dmamap_unload(rx->dmat, old_map); 2635 2636 /* swap the bus_dmamap_t's */ 2637 rx->info[idx].map = rx->extra_map; 2638 rx->extra_map = old_map; 2639 2640 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2641 * aligned */ 2642 m->m_data += MXGEFW_PAD; 2643 2644 m->m_pkthdr.rcvif = ifp; 2645 m->m_len = m->m_pkthdr.len = len; 2646 ss->ipackets++; 2647 eh = mtod(m, struct ether_header *); 2648 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2649 mxge_vlan_tag_remove(m, &csum); 2650 } 2651 /* flowid only valid if RSS hashing is enabled */ 2652 if (sc->num_slices > 1) { 2653 m->m_pkthdr.flowid = (ss - sc->ss); 2654 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2655 } 2656 /* if the checksum is valid, mark it in the mbuf header */ 2657 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2658 (0 == mxge_rx_csum(m, csum))) { 2659 /* Tell the stack that the checksum is good */ 2660 m->m_pkthdr.csum_data = 0xffff; 2661 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2662 CSUM_DATA_VALID; 2663 2664 #if defined(INET) || defined (INET6) 2665 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2666 return; 2667 #endif 2668 } 2669 /* pass the frame up the stack */ 2670 (*ifp->if_input)(ifp, m); 2671 } 2672 2673 static inline void 2674 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2675 uint32_t csum, int lro) 2676 { 2677 mxge_softc_t *sc; 2678 struct ifnet *ifp; 2679 struct ether_header *eh; 2680 struct mbuf *m; 2681 mxge_rx_ring_t *rx; 2682 bus_dmamap_t old_map; 2683 int idx; 2684 2685 sc = ss->sc; 2686 ifp = sc->ifp; 2687 rx = &ss->rx_small; 2688 idx = rx->cnt & rx->mask; 2689 rx->cnt++; 2690 /* save a pointer to the received mbuf */ 2691 m = rx->info[idx].m; 2692 /* try to replace the received mbuf */ 2693 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2694 /* drop the frame -- the old mbuf is re-cycled */ 2695 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2696 return; 2697 } 2698 2699 /* unmap the received buffer */ 2700 old_map = rx->info[idx].map; 2701 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2702 bus_dmamap_unload(rx->dmat, old_map); 2703 2704 /* swap the bus_dmamap_t's */ 2705 rx->info[idx].map = rx->extra_map; 2706 rx->extra_map = old_map; 2707 2708 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2709 * aligned */ 2710 m->m_data += MXGEFW_PAD; 2711 2712 m->m_pkthdr.rcvif = ifp; 2713 m->m_len = m->m_pkthdr.len = len; 2714 ss->ipackets++; 2715 eh = mtod(m, struct ether_header *); 2716 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2717 mxge_vlan_tag_remove(m, &csum); 2718 } 2719 /* flowid only valid if RSS hashing is enabled */ 2720 if (sc->num_slices > 1) { 2721 m->m_pkthdr.flowid = (ss - sc->ss); 2722 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2723 } 2724 /* if the checksum is valid, mark it in the mbuf header */ 2725 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2726 (0 == mxge_rx_csum(m, csum))) { 2727 /* Tell the stack that the checksum is good */ 2728 m->m_pkthdr.csum_data = 0xffff; 2729 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2730 CSUM_DATA_VALID; 2731 2732 #if defined(INET) || defined (INET6) 2733 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2734 return; 2735 #endif 2736 } 2737 /* pass the frame up the stack */ 2738 (*ifp->if_input)(ifp, m); 2739 } 2740 2741 static inline void 2742 mxge_clean_rx_done(struct mxge_slice_state *ss) 2743 { 2744 mxge_rx_done_t *rx_done = &ss->rx_done; 2745 int limit = 0; 2746 uint16_t length; 2747 uint16_t checksum; 2748 int lro; 2749 2750 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2751 while (rx_done->entry[rx_done->idx].length != 0) { 2752 length = ntohs(rx_done->entry[rx_done->idx].length); 2753 rx_done->entry[rx_done->idx].length = 0; 2754 checksum = rx_done->entry[rx_done->idx].checksum; 2755 if (length <= (MHLEN - MXGEFW_PAD)) 2756 mxge_rx_done_small(ss, length, checksum, lro); 2757 else 2758 mxge_rx_done_big(ss, length, checksum, lro); 2759 rx_done->cnt++; 2760 rx_done->idx = rx_done->cnt & rx_done->mask; 2761 2762 /* limit potential for livelock */ 2763 if (__predict_false(++limit > rx_done->mask / 2)) 2764 break; 2765 } 2766 #if defined(INET) || defined (INET6) 2767 tcp_lro_flush_all(&ss->lc); 2768 #endif 2769 } 2770 2771 static inline void 2772 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2773 { 2774 struct ifnet *ifp __unused; 2775 mxge_tx_ring_t *tx; 2776 struct mbuf *m; 2777 bus_dmamap_t map; 2778 int idx; 2779 int *flags; 2780 2781 tx = &ss->tx; 2782 ifp = ss->sc->ifp; 2783 while (tx->pkt_done != mcp_idx) { 2784 idx = tx->done & tx->mask; 2785 tx->done++; 2786 m = tx->info[idx].m; 2787 /* mbuf and DMA map only attached to the first 2788 segment per-mbuf */ 2789 if (m != NULL) { 2790 ss->obytes += m->m_pkthdr.len; 2791 if (m->m_flags & M_MCAST) 2792 ss->omcasts++; 2793 ss->opackets++; 2794 tx->info[idx].m = NULL; 2795 map = tx->info[idx].map; 2796 bus_dmamap_unload(tx->dmat, map); 2797 m_freem(m); 2798 } 2799 if (tx->info[idx].flag) { 2800 tx->info[idx].flag = 0; 2801 tx->pkt_done++; 2802 } 2803 } 2804 2805 /* If we have space, clear IFF_OACTIVE to tell the stack that 2806 its OK to send packets */ 2807 #ifdef IFNET_BUF_RING 2808 flags = &ss->if_drv_flags; 2809 #else 2810 flags = &ifp->if_drv_flags; 2811 #endif 2812 mtx_lock(&ss->tx.mtx); 2813 if ((*flags) & IFF_DRV_OACTIVE && 2814 tx->req - tx->done < (tx->mask + 1)/4) { 2815 *(flags) &= ~IFF_DRV_OACTIVE; 2816 ss->tx.wake++; 2817 mxge_start_locked(ss); 2818 } 2819 #ifdef IFNET_BUF_RING 2820 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2821 /* let the NIC stop polling this queue, since there 2822 * are no more transmits pending */ 2823 if (tx->req == tx->done) { 2824 *tx->send_stop = 1; 2825 tx->queue_active = 0; 2826 tx->deactivate++; 2827 wmb(); 2828 } 2829 } 2830 #endif 2831 mtx_unlock(&ss->tx.mtx); 2832 2833 } 2834 2835 static struct mxge_media_type mxge_xfp_media_types[] = 2836 { 2837 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2838 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2839 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2840 {0, (1 << 5), "10GBASE-ER"}, 2841 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2842 {0, (1 << 3), "10GBASE-SW"}, 2843 {0, (1 << 2), "10GBASE-LW"}, 2844 {0, (1 << 1), "10GBASE-EW"}, 2845 {0, (1 << 0), "Reserved"} 2846 }; 2847 static struct mxge_media_type mxge_sfp_media_types[] = 2848 { 2849 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2850 {0, (1 << 7), "Reserved"}, 2851 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2852 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2853 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2854 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2855 }; 2856 2857 static void 2858 mxge_media_set(mxge_softc_t *sc, int media_type) 2859 { 2860 2861 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2862 0, NULL); 2863 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2864 sc->current_media = media_type; 2865 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2866 } 2867 2868 static void 2869 mxge_media_init(mxge_softc_t *sc) 2870 { 2871 char *ptr; 2872 int i; 2873 2874 ifmedia_removeall(&sc->media); 2875 mxge_media_set(sc, IFM_AUTO); 2876 2877 /* 2878 * parse the product code to deterimine the interface type 2879 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2880 * after the 3rd dash in the driver's cached copy of the 2881 * EEPROM's product code string. 2882 */ 2883 ptr = sc->product_code_string; 2884 if (ptr == NULL) { 2885 device_printf(sc->dev, "Missing product code\n"); 2886 return; 2887 } 2888 2889 for (i = 0; i < 3; i++, ptr++) { 2890 ptr = strchr(ptr, '-'); 2891 if (ptr == NULL) { 2892 device_printf(sc->dev, 2893 "only %d dashes in PC?!?\n", i); 2894 return; 2895 } 2896 } 2897 if (*ptr == 'C' || *(ptr +1) == 'C') { 2898 /* -C is CX4 */ 2899 sc->connector = MXGE_CX4; 2900 mxge_media_set(sc, IFM_10G_CX4); 2901 } else if (*ptr == 'Q') { 2902 /* -Q is Quad Ribbon Fiber */ 2903 sc->connector = MXGE_QRF; 2904 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2905 /* FreeBSD has no media type for Quad ribbon fiber */ 2906 } else if (*ptr == 'R') { 2907 /* -R is XFP */ 2908 sc->connector = MXGE_XFP; 2909 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2910 /* -S or -2S is SFP+ */ 2911 sc->connector = MXGE_SFP; 2912 } else { 2913 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2914 } 2915 } 2916 2917 /* 2918 * Determine the media type for a NIC. Some XFPs will identify 2919 * themselves only when their link is up, so this is initiated via a 2920 * link up interrupt. However, this can potentially take up to 2921 * several milliseconds, so it is run via the watchdog routine, rather 2922 * than in the interrupt handler itself. 2923 */ 2924 static void 2925 mxge_media_probe(mxge_softc_t *sc) 2926 { 2927 mxge_cmd_t cmd; 2928 char *cage_type; 2929 2930 struct mxge_media_type *mxge_media_types = NULL; 2931 int i, err, ms, mxge_media_type_entries; 2932 uint32_t byte; 2933 2934 sc->need_media_probe = 0; 2935 2936 if (sc->connector == MXGE_XFP) { 2937 /* -R is XFP */ 2938 mxge_media_types = mxge_xfp_media_types; 2939 mxge_media_type_entries = 2940 nitems(mxge_xfp_media_types); 2941 byte = MXGE_XFP_COMPLIANCE_BYTE; 2942 cage_type = "XFP"; 2943 } else if (sc->connector == MXGE_SFP) { 2944 /* -S or -2S is SFP+ */ 2945 mxge_media_types = mxge_sfp_media_types; 2946 mxge_media_type_entries = 2947 nitems(mxge_sfp_media_types); 2948 cage_type = "SFP+"; 2949 byte = 3; 2950 } else { 2951 /* nothing to do; media type cannot change */ 2952 return; 2953 } 2954 2955 /* 2956 * At this point we know the NIC has an XFP cage, so now we 2957 * try to determine what is in the cage by using the 2958 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2959 * register. We read just one byte, which may take over 2960 * a millisecond 2961 */ 2962 2963 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2964 cmd.data1 = byte; 2965 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2966 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2967 device_printf(sc->dev, "failed to read XFP\n"); 2968 } 2969 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2970 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2971 } 2972 if (err != MXGEFW_CMD_OK) { 2973 return; 2974 } 2975 2976 /* now we wait for the data to be cached */ 2977 cmd.data0 = byte; 2978 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2979 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2980 DELAY(1000); 2981 cmd.data0 = byte; 2982 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2983 } 2984 if (err != MXGEFW_CMD_OK) { 2985 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2986 cage_type, err, ms); 2987 return; 2988 } 2989 2990 if (cmd.data0 == mxge_media_types[0].bitmask) { 2991 if (mxge_verbose) 2992 device_printf(sc->dev, "%s:%s\n", cage_type, 2993 mxge_media_types[0].name); 2994 if (sc->current_media != mxge_media_types[0].flag) { 2995 mxge_media_init(sc); 2996 mxge_media_set(sc, mxge_media_types[0].flag); 2997 } 2998 return; 2999 } 3000 for (i = 1; i < mxge_media_type_entries; i++) { 3001 if (cmd.data0 & mxge_media_types[i].bitmask) { 3002 if (mxge_verbose) 3003 device_printf(sc->dev, "%s:%s\n", 3004 cage_type, 3005 mxge_media_types[i].name); 3006 3007 if (sc->current_media != mxge_media_types[i].flag) { 3008 mxge_media_init(sc); 3009 mxge_media_set(sc, mxge_media_types[i].flag); 3010 } 3011 return; 3012 } 3013 } 3014 if (mxge_verbose) 3015 device_printf(sc->dev, "%s media 0x%x unknown\n", 3016 cage_type, cmd.data0); 3017 3018 return; 3019 } 3020 3021 static void 3022 mxge_intr(void *arg) 3023 { 3024 struct mxge_slice_state *ss = arg; 3025 mxge_softc_t *sc = ss->sc; 3026 mcp_irq_data_t *stats = ss->fw_stats; 3027 mxge_tx_ring_t *tx = &ss->tx; 3028 mxge_rx_done_t *rx_done = &ss->rx_done; 3029 uint32_t send_done_count; 3030 uint8_t valid; 3031 3032 #ifndef IFNET_BUF_RING 3033 /* an interrupt on a non-zero slice is implicitly valid 3034 since MSI-X irqs are not shared */ 3035 if (ss != sc->ss) { 3036 mxge_clean_rx_done(ss); 3037 *ss->irq_claim = be32toh(3); 3038 return; 3039 } 3040 #endif 3041 3042 /* make sure the DMA has finished */ 3043 if (!stats->valid) { 3044 return; 3045 } 3046 valid = stats->valid; 3047 3048 if (sc->legacy_irq) { 3049 /* lower legacy IRQ */ 3050 *sc->irq_deassert = 0; 3051 if (!mxge_deassert_wait) 3052 /* don't wait for conf. that irq is low */ 3053 stats->valid = 0; 3054 } else { 3055 stats->valid = 0; 3056 } 3057 3058 /* loop while waiting for legacy irq deassertion */ 3059 do { 3060 /* check for transmit completes and receives */ 3061 send_done_count = be32toh(stats->send_done_count); 3062 while ((send_done_count != tx->pkt_done) || 3063 (rx_done->entry[rx_done->idx].length != 0)) { 3064 if (send_done_count != tx->pkt_done) 3065 mxge_tx_done(ss, (int)send_done_count); 3066 mxge_clean_rx_done(ss); 3067 send_done_count = be32toh(stats->send_done_count); 3068 } 3069 if (sc->legacy_irq && mxge_deassert_wait) 3070 wmb(); 3071 } while (*((volatile uint8_t *) &stats->valid)); 3072 3073 /* fw link & error stats meaningful only on the first slice */ 3074 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3075 if (sc->link_state != stats->link_up) { 3076 sc->link_state = stats->link_up; 3077 if (sc->link_state) { 3078 if_link_state_change(sc->ifp, LINK_STATE_UP); 3079 if (mxge_verbose) 3080 device_printf(sc->dev, "link up\n"); 3081 } else { 3082 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3083 if (mxge_verbose) 3084 device_printf(sc->dev, "link down\n"); 3085 } 3086 sc->need_media_probe = 1; 3087 } 3088 if (sc->rdma_tags_available != 3089 be32toh(stats->rdma_tags_available)) { 3090 sc->rdma_tags_available = 3091 be32toh(stats->rdma_tags_available); 3092 device_printf(sc->dev, "RDMA timed out! %d tags " 3093 "left\n", sc->rdma_tags_available); 3094 } 3095 3096 if (stats->link_down) { 3097 sc->down_cnt += stats->link_down; 3098 sc->link_state = 0; 3099 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3100 } 3101 } 3102 3103 /* check to see if we have rx token to pass back */ 3104 if (valid & 0x1) 3105 *ss->irq_claim = be32toh(3); 3106 *(ss->irq_claim + 1) = be32toh(3); 3107 } 3108 3109 static void 3110 mxge_init(void *arg) 3111 { 3112 mxge_softc_t *sc = arg; 3113 struct ifnet *ifp = sc->ifp; 3114 3115 mtx_lock(&sc->driver_mtx); 3116 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3117 (void) mxge_open(sc); 3118 mtx_unlock(&sc->driver_mtx); 3119 } 3120 3121 static void 3122 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3123 { 3124 int i; 3125 3126 #if defined(INET) || defined(INET6) 3127 tcp_lro_free(&ss->lc); 3128 #endif 3129 for (i = 0; i <= ss->rx_big.mask; i++) { 3130 if (ss->rx_big.info[i].m == NULL) 3131 continue; 3132 bus_dmamap_unload(ss->rx_big.dmat, 3133 ss->rx_big.info[i].map); 3134 m_freem(ss->rx_big.info[i].m); 3135 ss->rx_big.info[i].m = NULL; 3136 } 3137 3138 for (i = 0; i <= ss->rx_small.mask; i++) { 3139 if (ss->rx_small.info[i].m == NULL) 3140 continue; 3141 bus_dmamap_unload(ss->rx_small.dmat, 3142 ss->rx_small.info[i].map); 3143 m_freem(ss->rx_small.info[i].m); 3144 ss->rx_small.info[i].m = NULL; 3145 } 3146 3147 /* transmit ring used only on the first slice */ 3148 if (ss->tx.info == NULL) 3149 return; 3150 3151 for (i = 0; i <= ss->tx.mask; i++) { 3152 ss->tx.info[i].flag = 0; 3153 if (ss->tx.info[i].m == NULL) 3154 continue; 3155 bus_dmamap_unload(ss->tx.dmat, 3156 ss->tx.info[i].map); 3157 m_freem(ss->tx.info[i].m); 3158 ss->tx.info[i].m = NULL; 3159 } 3160 } 3161 3162 static void 3163 mxge_free_mbufs(mxge_softc_t *sc) 3164 { 3165 int slice; 3166 3167 for (slice = 0; slice < sc->num_slices; slice++) 3168 mxge_free_slice_mbufs(&sc->ss[slice]); 3169 } 3170 3171 static void 3172 mxge_free_slice_rings(struct mxge_slice_state *ss) 3173 { 3174 int i; 3175 3176 if (ss->rx_done.entry != NULL) 3177 mxge_dma_free(&ss->rx_done.dma); 3178 ss->rx_done.entry = NULL; 3179 3180 if (ss->tx.req_bytes != NULL) 3181 free(ss->tx.req_bytes, M_DEVBUF); 3182 ss->tx.req_bytes = NULL; 3183 3184 if (ss->tx.seg_list != NULL) 3185 free(ss->tx.seg_list, M_DEVBUF); 3186 ss->tx.seg_list = NULL; 3187 3188 if (ss->rx_small.shadow != NULL) 3189 free(ss->rx_small.shadow, M_DEVBUF); 3190 ss->rx_small.shadow = NULL; 3191 3192 if (ss->rx_big.shadow != NULL) 3193 free(ss->rx_big.shadow, M_DEVBUF); 3194 ss->rx_big.shadow = NULL; 3195 3196 if (ss->tx.info != NULL) { 3197 if (ss->tx.dmat != NULL) { 3198 for (i = 0; i <= ss->tx.mask; i++) { 3199 bus_dmamap_destroy(ss->tx.dmat, 3200 ss->tx.info[i].map); 3201 } 3202 bus_dma_tag_destroy(ss->tx.dmat); 3203 } 3204 free(ss->tx.info, M_DEVBUF); 3205 } 3206 ss->tx.info = NULL; 3207 3208 if (ss->rx_small.info != NULL) { 3209 if (ss->rx_small.dmat != NULL) { 3210 for (i = 0; i <= ss->rx_small.mask; i++) { 3211 bus_dmamap_destroy(ss->rx_small.dmat, 3212 ss->rx_small.info[i].map); 3213 } 3214 bus_dmamap_destroy(ss->rx_small.dmat, 3215 ss->rx_small.extra_map); 3216 bus_dma_tag_destroy(ss->rx_small.dmat); 3217 } 3218 free(ss->rx_small.info, M_DEVBUF); 3219 } 3220 ss->rx_small.info = NULL; 3221 3222 if (ss->rx_big.info != NULL) { 3223 if (ss->rx_big.dmat != NULL) { 3224 for (i = 0; i <= ss->rx_big.mask; i++) { 3225 bus_dmamap_destroy(ss->rx_big.dmat, 3226 ss->rx_big.info[i].map); 3227 } 3228 bus_dmamap_destroy(ss->rx_big.dmat, 3229 ss->rx_big.extra_map); 3230 bus_dma_tag_destroy(ss->rx_big.dmat); 3231 } 3232 free(ss->rx_big.info, M_DEVBUF); 3233 } 3234 ss->rx_big.info = NULL; 3235 } 3236 3237 static void 3238 mxge_free_rings(mxge_softc_t *sc) 3239 { 3240 int slice; 3241 3242 for (slice = 0; slice < sc->num_slices; slice++) 3243 mxge_free_slice_rings(&sc->ss[slice]); 3244 } 3245 3246 static int 3247 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3248 int tx_ring_entries) 3249 { 3250 mxge_softc_t *sc = ss->sc; 3251 size_t bytes; 3252 int err, i; 3253 3254 /* allocate per-slice receive resources */ 3255 3256 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3257 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3258 3259 /* allocate the rx shadow rings */ 3260 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3261 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3262 3263 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3264 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3265 3266 /* allocate the rx host info rings */ 3267 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3268 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3269 3270 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3271 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3272 3273 /* allocate the rx busdma resources */ 3274 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3275 1, /* alignment */ 3276 4096, /* boundary */ 3277 BUS_SPACE_MAXADDR, /* low */ 3278 BUS_SPACE_MAXADDR, /* high */ 3279 NULL, NULL, /* filter */ 3280 MHLEN, /* maxsize */ 3281 1, /* num segs */ 3282 MHLEN, /* maxsegsize */ 3283 BUS_DMA_ALLOCNOW, /* flags */ 3284 NULL, NULL, /* lock */ 3285 &ss->rx_small.dmat); /* tag */ 3286 if (err != 0) { 3287 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3288 err); 3289 return err; 3290 } 3291 3292 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3293 1, /* alignment */ 3294 #if MXGE_VIRT_JUMBOS 3295 4096, /* boundary */ 3296 #else 3297 0, /* boundary */ 3298 #endif 3299 BUS_SPACE_MAXADDR, /* low */ 3300 BUS_SPACE_MAXADDR, /* high */ 3301 NULL, NULL, /* filter */ 3302 3*4096, /* maxsize */ 3303 #if MXGE_VIRT_JUMBOS 3304 3, /* num segs */ 3305 4096, /* maxsegsize*/ 3306 #else 3307 1, /* num segs */ 3308 MJUM9BYTES, /* maxsegsize*/ 3309 #endif 3310 BUS_DMA_ALLOCNOW, /* flags */ 3311 NULL, NULL, /* lock */ 3312 &ss->rx_big.dmat); /* tag */ 3313 if (err != 0) { 3314 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3315 err); 3316 return err; 3317 } 3318 for (i = 0; i <= ss->rx_small.mask; i++) { 3319 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3320 &ss->rx_small.info[i].map); 3321 if (err != 0) { 3322 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3323 err); 3324 return err; 3325 } 3326 } 3327 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3328 &ss->rx_small.extra_map); 3329 if (err != 0) { 3330 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3331 err); 3332 return err; 3333 } 3334 3335 for (i = 0; i <= ss->rx_big.mask; i++) { 3336 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3337 &ss->rx_big.info[i].map); 3338 if (err != 0) { 3339 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3340 err); 3341 return err; 3342 } 3343 } 3344 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3345 &ss->rx_big.extra_map); 3346 if (err != 0) { 3347 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3348 err); 3349 return err; 3350 } 3351 3352 /* now allocate TX resources */ 3353 3354 #ifndef IFNET_BUF_RING 3355 /* only use a single TX ring for now */ 3356 if (ss != ss->sc->ss) 3357 return 0; 3358 #endif 3359 3360 ss->tx.mask = tx_ring_entries - 1; 3361 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3362 3363 /* allocate the tx request copy block */ 3364 bytes = 8 + 3365 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3366 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3367 /* ensure req_list entries are aligned to 8 bytes */ 3368 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3369 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL); 3370 3371 /* allocate the tx busdma segment list */ 3372 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3373 ss->tx.seg_list = (bus_dma_segment_t *) 3374 malloc(bytes, M_DEVBUF, M_WAITOK); 3375 3376 /* allocate the tx host info ring */ 3377 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3378 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3379 3380 /* allocate the tx busdma resources */ 3381 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3382 1, /* alignment */ 3383 sc->tx_boundary, /* boundary */ 3384 BUS_SPACE_MAXADDR, /* low */ 3385 BUS_SPACE_MAXADDR, /* high */ 3386 NULL, NULL, /* filter */ 3387 65536 + 256, /* maxsize */ 3388 ss->tx.max_desc - 2, /* num segs */ 3389 sc->tx_boundary, /* maxsegsz */ 3390 BUS_DMA_ALLOCNOW, /* flags */ 3391 NULL, NULL, /* lock */ 3392 &ss->tx.dmat); /* tag */ 3393 3394 if (err != 0) { 3395 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3396 err); 3397 return err; 3398 } 3399 3400 /* now use these tags to setup dmamaps for each slot 3401 in the ring */ 3402 for (i = 0; i <= ss->tx.mask; i++) { 3403 err = bus_dmamap_create(ss->tx.dmat, 0, 3404 &ss->tx.info[i].map); 3405 if (err != 0) { 3406 device_printf(sc->dev, "Err %d tx dmamap\n", 3407 err); 3408 return err; 3409 } 3410 } 3411 return 0; 3412 3413 } 3414 3415 static int 3416 mxge_alloc_rings(mxge_softc_t *sc) 3417 { 3418 mxge_cmd_t cmd; 3419 int tx_ring_size; 3420 int tx_ring_entries, rx_ring_entries; 3421 int err, slice; 3422 3423 /* get ring sizes */ 3424 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3425 tx_ring_size = cmd.data0; 3426 if (err != 0) { 3427 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3428 goto abort; 3429 } 3430 3431 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3432 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3433 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3434 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3435 IFQ_SET_READY(&sc->ifp->if_snd); 3436 3437 for (slice = 0; slice < sc->num_slices; slice++) { 3438 err = mxge_alloc_slice_rings(&sc->ss[slice], 3439 rx_ring_entries, 3440 tx_ring_entries); 3441 if (err != 0) 3442 goto abort; 3443 } 3444 return 0; 3445 3446 abort: 3447 mxge_free_rings(sc); 3448 return err; 3449 3450 } 3451 3452 static void 3453 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3454 { 3455 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3456 3457 if (bufsize < MCLBYTES) { 3458 /* easy, everything fits in a single buffer */ 3459 *big_buf_size = MCLBYTES; 3460 *cl_size = MCLBYTES; 3461 *nbufs = 1; 3462 return; 3463 } 3464 3465 if (bufsize < MJUMPAGESIZE) { 3466 /* still easy, everything still fits in a single buffer */ 3467 *big_buf_size = MJUMPAGESIZE; 3468 *cl_size = MJUMPAGESIZE; 3469 *nbufs = 1; 3470 return; 3471 } 3472 #if MXGE_VIRT_JUMBOS 3473 /* now we need to use virtually contiguous buffers */ 3474 *cl_size = MJUM9BYTES; 3475 *big_buf_size = 4096; 3476 *nbufs = mtu / 4096 + 1; 3477 /* needs to be a power of two, so round up */ 3478 if (*nbufs == 3) 3479 *nbufs = 4; 3480 #else 3481 *cl_size = MJUM9BYTES; 3482 *big_buf_size = MJUM9BYTES; 3483 *nbufs = 1; 3484 #endif 3485 } 3486 3487 static int 3488 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3489 { 3490 mxge_softc_t *sc; 3491 mxge_cmd_t cmd; 3492 bus_dmamap_t map; 3493 int err, i, slice; 3494 3495 sc = ss->sc; 3496 slice = ss - sc->ss; 3497 3498 #if defined(INET) || defined(INET6) 3499 (void)tcp_lro_init(&ss->lc); 3500 #endif 3501 ss->lc.ifp = sc->ifp; 3502 3503 /* get the lanai pointers to the send and receive rings */ 3504 3505 err = 0; 3506 #ifndef IFNET_BUF_RING 3507 /* We currently only send from the first slice */ 3508 if (slice == 0) { 3509 #endif 3510 cmd.data0 = slice; 3511 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3512 ss->tx.lanai = 3513 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3514 ss->tx.send_go = (volatile uint32_t *) 3515 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3516 ss->tx.send_stop = (volatile uint32_t *) 3517 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3518 #ifndef IFNET_BUF_RING 3519 } 3520 #endif 3521 cmd.data0 = slice; 3522 err |= mxge_send_cmd(sc, 3523 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3524 ss->rx_small.lanai = 3525 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3526 cmd.data0 = slice; 3527 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3528 ss->rx_big.lanai = 3529 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3530 3531 if (err != 0) { 3532 device_printf(sc->dev, 3533 "failed to get ring sizes or locations\n"); 3534 return EIO; 3535 } 3536 3537 /* stock receive rings */ 3538 for (i = 0; i <= ss->rx_small.mask; i++) { 3539 map = ss->rx_small.info[i].map; 3540 err = mxge_get_buf_small(ss, map, i); 3541 if (err) { 3542 device_printf(sc->dev, "alloced %d/%d smalls\n", 3543 i, ss->rx_small.mask + 1); 3544 return ENOMEM; 3545 } 3546 } 3547 for (i = 0; i <= ss->rx_big.mask; i++) { 3548 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3549 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3550 } 3551 ss->rx_big.nbufs = nbufs; 3552 ss->rx_big.cl_size = cl_size; 3553 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3554 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3555 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3556 map = ss->rx_big.info[i].map; 3557 err = mxge_get_buf_big(ss, map, i); 3558 if (err) { 3559 device_printf(sc->dev, "alloced %d/%d bigs\n", 3560 i, ss->rx_big.mask + 1); 3561 return ENOMEM; 3562 } 3563 } 3564 return 0; 3565 } 3566 3567 static int 3568 mxge_open(mxge_softc_t *sc) 3569 { 3570 mxge_cmd_t cmd; 3571 int err, big_bytes, nbufs, slice, cl_size, i; 3572 bus_addr_t bus; 3573 volatile uint8_t *itable; 3574 struct mxge_slice_state *ss; 3575 3576 /* Copy the MAC address in case it was overridden */ 3577 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3578 3579 err = mxge_reset(sc, 1); 3580 if (err != 0) { 3581 device_printf(sc->dev, "failed to reset\n"); 3582 return EIO; 3583 } 3584 3585 if (sc->num_slices > 1) { 3586 /* setup the indirection table */ 3587 cmd.data0 = sc->num_slices; 3588 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3589 &cmd); 3590 3591 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3592 &cmd); 3593 if (err != 0) { 3594 device_printf(sc->dev, 3595 "failed to setup rss tables\n"); 3596 return err; 3597 } 3598 3599 /* just enable an identity mapping */ 3600 itable = sc->sram + cmd.data0; 3601 for (i = 0; i < sc->num_slices; i++) 3602 itable[i] = (uint8_t)i; 3603 3604 cmd.data0 = 1; 3605 cmd.data1 = mxge_rss_hash_type; 3606 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3607 if (err != 0) { 3608 device_printf(sc->dev, "failed to enable slices\n"); 3609 return err; 3610 } 3611 } 3612 3613 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3614 3615 cmd.data0 = nbufs; 3616 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3617 &cmd); 3618 /* error is only meaningful if we're trying to set 3619 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3620 if (err && nbufs > 1) { 3621 device_printf(sc->dev, 3622 "Failed to set alway-use-n to %d\n", 3623 nbufs); 3624 return EIO; 3625 } 3626 /* Give the firmware the mtu and the big and small buffer 3627 sizes. The firmware wants the big buf size to be a power 3628 of two. Luckily, FreeBSD's clusters are powers of two */ 3629 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3630 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3631 cmd.data0 = MHLEN - MXGEFW_PAD; 3632 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3633 &cmd); 3634 cmd.data0 = big_bytes; 3635 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3636 3637 if (err != 0) { 3638 device_printf(sc->dev, "failed to setup params\n"); 3639 goto abort; 3640 } 3641 3642 /* Now give him the pointer to the stats block */ 3643 for (slice = 0; 3644 #ifdef IFNET_BUF_RING 3645 slice < sc->num_slices; 3646 #else 3647 slice < 1; 3648 #endif 3649 slice++) { 3650 ss = &sc->ss[slice]; 3651 cmd.data0 = 3652 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3653 cmd.data1 = 3654 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3655 cmd.data2 = sizeof(struct mcp_irq_data); 3656 cmd.data2 |= (slice << 16); 3657 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3658 } 3659 3660 if (err != 0) { 3661 bus = sc->ss->fw_stats_dma.bus_addr; 3662 bus += offsetof(struct mcp_irq_data, send_done_count); 3663 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3664 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3665 err = mxge_send_cmd(sc, 3666 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3667 &cmd); 3668 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3669 sc->fw_multicast_support = 0; 3670 } else { 3671 sc->fw_multicast_support = 1; 3672 } 3673 3674 if (err != 0) { 3675 device_printf(sc->dev, "failed to setup params\n"); 3676 goto abort; 3677 } 3678 3679 for (slice = 0; slice < sc->num_slices; slice++) { 3680 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3681 if (err != 0) { 3682 device_printf(sc->dev, "couldn't open slice %d\n", 3683 slice); 3684 goto abort; 3685 } 3686 } 3687 3688 /* Finally, start the firmware running */ 3689 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3690 if (err) { 3691 device_printf(sc->dev, "Couldn't bring up link\n"); 3692 goto abort; 3693 } 3694 #ifdef IFNET_BUF_RING 3695 for (slice = 0; slice < sc->num_slices; slice++) { 3696 ss = &sc->ss[slice]; 3697 ss->if_drv_flags |= IFF_DRV_RUNNING; 3698 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3699 } 3700 #endif 3701 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3702 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3703 3704 return 0; 3705 3706 abort: 3707 mxge_free_mbufs(sc); 3708 3709 return err; 3710 } 3711 3712 static int 3713 mxge_close(mxge_softc_t *sc, int down) 3714 { 3715 mxge_cmd_t cmd; 3716 int err, old_down_cnt; 3717 #ifdef IFNET_BUF_RING 3718 struct mxge_slice_state *ss; 3719 int slice; 3720 #endif 3721 3722 #ifdef IFNET_BUF_RING 3723 for (slice = 0; slice < sc->num_slices; slice++) { 3724 ss = &sc->ss[slice]; 3725 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3726 } 3727 #endif 3728 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3729 if (!down) { 3730 old_down_cnt = sc->down_cnt; 3731 wmb(); 3732 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3733 if (err) { 3734 device_printf(sc->dev, 3735 "Couldn't bring down link\n"); 3736 } 3737 if (old_down_cnt == sc->down_cnt) { 3738 /* wait for down irq */ 3739 DELAY(10 * sc->intr_coal_delay); 3740 } 3741 wmb(); 3742 if (old_down_cnt == sc->down_cnt) { 3743 device_printf(sc->dev, "never got down irq\n"); 3744 } 3745 } 3746 mxge_free_mbufs(sc); 3747 3748 return 0; 3749 } 3750 3751 static void 3752 mxge_setup_cfg_space(mxge_softc_t *sc) 3753 { 3754 device_t dev = sc->dev; 3755 int reg; 3756 uint16_t lnk, pectl; 3757 3758 /* find the PCIe link width and set max read request to 4KB*/ 3759 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3760 lnk = pci_read_config(dev, reg + 0x12, 2); 3761 sc->link_width = (lnk >> 4) & 0x3f; 3762 3763 if (sc->pectl == 0) { 3764 pectl = pci_read_config(dev, reg + 0x8, 2); 3765 pectl = (pectl & ~0x7000) | (5 << 12); 3766 pci_write_config(dev, reg + 0x8, pectl, 2); 3767 sc->pectl = pectl; 3768 } else { 3769 /* restore saved pectl after watchdog reset */ 3770 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3771 } 3772 } 3773 3774 /* Enable DMA and Memory space access */ 3775 pci_enable_busmaster(dev); 3776 } 3777 3778 static uint32_t 3779 mxge_read_reboot(mxge_softc_t *sc) 3780 { 3781 device_t dev = sc->dev; 3782 uint32_t vs; 3783 3784 /* find the vendor specific offset */ 3785 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3786 device_printf(sc->dev, 3787 "could not find vendor specific offset\n"); 3788 return (uint32_t)-1; 3789 } 3790 /* enable read32 mode */ 3791 pci_write_config(dev, vs + 0x10, 0x3, 1); 3792 /* tell NIC which register to read */ 3793 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3794 return (pci_read_config(dev, vs + 0x14, 4)); 3795 } 3796 3797 static void 3798 mxge_watchdog_reset(mxge_softc_t *sc) 3799 { 3800 struct pci_devinfo *dinfo; 3801 struct mxge_slice_state *ss; 3802 int err, running, s, num_tx_slices = 1; 3803 uint32_t reboot; 3804 uint16_t cmd; 3805 3806 err = ENXIO; 3807 3808 device_printf(sc->dev, "Watchdog reset!\n"); 3809 3810 /* 3811 * check to see if the NIC rebooted. If it did, then all of 3812 * PCI config space has been reset, and things like the 3813 * busmaster bit will be zero. If this is the case, then we 3814 * must restore PCI config space before the NIC can be used 3815 * again 3816 */ 3817 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3818 if (cmd == 0xffff) { 3819 /* 3820 * maybe the watchdog caught the NIC rebooting; wait 3821 * up to 100ms for it to finish. If it does not come 3822 * back, then give up 3823 */ 3824 DELAY(1000*100); 3825 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3826 if (cmd == 0xffff) { 3827 device_printf(sc->dev, "NIC disappeared!\n"); 3828 } 3829 } 3830 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3831 /* print the reboot status */ 3832 reboot = mxge_read_reboot(sc); 3833 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3834 reboot); 3835 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3836 if (running) { 3837 /* 3838 * quiesce NIC so that TX routines will not try to 3839 * xmit after restoration of BAR 3840 */ 3841 3842 /* Mark the link as down */ 3843 if (sc->link_state) { 3844 sc->link_state = 0; 3845 if_link_state_change(sc->ifp, 3846 LINK_STATE_DOWN); 3847 } 3848 #ifdef IFNET_BUF_RING 3849 num_tx_slices = sc->num_slices; 3850 #endif 3851 /* grab all TX locks to ensure no tx */ 3852 for (s = 0; s < num_tx_slices; s++) { 3853 ss = &sc->ss[s]; 3854 mtx_lock(&ss->tx.mtx); 3855 } 3856 mxge_close(sc, 1); 3857 } 3858 /* restore PCI configuration space */ 3859 dinfo = device_get_ivars(sc->dev); 3860 pci_cfg_restore(sc->dev, dinfo); 3861 3862 /* and redo any changes we made to our config space */ 3863 mxge_setup_cfg_space(sc); 3864 3865 /* reload f/w */ 3866 err = mxge_load_firmware(sc, 0); 3867 if (err) { 3868 device_printf(sc->dev, 3869 "Unable to re-load f/w\n"); 3870 } 3871 if (running) { 3872 if (!err) 3873 err = mxge_open(sc); 3874 /* release all TX locks */ 3875 for (s = 0; s < num_tx_slices; s++) { 3876 ss = &sc->ss[s]; 3877 #ifdef IFNET_BUF_RING 3878 mxge_start_locked(ss); 3879 #endif 3880 mtx_unlock(&ss->tx.mtx); 3881 } 3882 } 3883 sc->watchdog_resets++; 3884 } else { 3885 device_printf(sc->dev, 3886 "NIC did not reboot, not resetting\n"); 3887 err = 0; 3888 } 3889 if (err) { 3890 device_printf(sc->dev, "watchdog reset failed\n"); 3891 } else { 3892 if (sc->dying == 2) 3893 sc->dying = 0; 3894 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3895 } 3896 } 3897 3898 static void 3899 mxge_watchdog_task(void *arg, int pending) 3900 { 3901 mxge_softc_t *sc = arg; 3902 3903 mtx_lock(&sc->driver_mtx); 3904 mxge_watchdog_reset(sc); 3905 mtx_unlock(&sc->driver_mtx); 3906 } 3907 3908 static void 3909 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3910 { 3911 tx = &sc->ss[slice].tx; 3912 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3913 device_printf(sc->dev, 3914 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3915 tx->req, tx->done, tx->queue_active); 3916 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3917 tx->activate, tx->deactivate); 3918 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3919 tx->pkt_done, 3920 be32toh(sc->ss->fw_stats->send_done_count)); 3921 } 3922 3923 static int 3924 mxge_watchdog(mxge_softc_t *sc) 3925 { 3926 mxge_tx_ring_t *tx; 3927 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3928 int i, err = 0; 3929 3930 /* see if we have outstanding transmits, which 3931 have been pending for more than mxge_ticks */ 3932 for (i = 0; 3933 #ifdef IFNET_BUF_RING 3934 (i < sc->num_slices) && (err == 0); 3935 #else 3936 (i < 1) && (err == 0); 3937 #endif 3938 i++) { 3939 tx = &sc->ss[i].tx; 3940 if (tx->req != tx->done && 3941 tx->watchdog_req != tx->watchdog_done && 3942 tx->done == tx->watchdog_done) { 3943 /* check for pause blocking before resetting */ 3944 if (tx->watchdog_rx_pause == rx_pause) { 3945 mxge_warn_stuck(sc, tx, i); 3946 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3947 return (ENXIO); 3948 } 3949 else 3950 device_printf(sc->dev, "Flow control blocking " 3951 "xmits, check link partner\n"); 3952 } 3953 3954 tx->watchdog_req = tx->req; 3955 tx->watchdog_done = tx->done; 3956 tx->watchdog_rx_pause = rx_pause; 3957 } 3958 3959 if (sc->need_media_probe) 3960 mxge_media_probe(sc); 3961 return (err); 3962 } 3963 3964 static uint64_t 3965 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 3966 { 3967 struct mxge_softc *sc; 3968 uint64_t rv; 3969 3970 sc = if_getsoftc(ifp); 3971 rv = 0; 3972 3973 switch (cnt) { 3974 case IFCOUNTER_IPACKETS: 3975 for (int s = 0; s < sc->num_slices; s++) 3976 rv += sc->ss[s].ipackets; 3977 return (rv); 3978 case IFCOUNTER_OPACKETS: 3979 for (int s = 0; s < sc->num_slices; s++) 3980 rv += sc->ss[s].opackets; 3981 return (rv); 3982 case IFCOUNTER_OERRORS: 3983 for (int s = 0; s < sc->num_slices; s++) 3984 rv += sc->ss[s].oerrors; 3985 return (rv); 3986 #ifdef IFNET_BUF_RING 3987 case IFCOUNTER_OBYTES: 3988 for (int s = 0; s < sc->num_slices; s++) 3989 rv += sc->ss[s].obytes; 3990 return (rv); 3991 case IFCOUNTER_OMCASTS: 3992 for (int s = 0; s < sc->num_slices; s++) 3993 rv += sc->ss[s].omcasts; 3994 return (rv); 3995 case IFCOUNTER_OQDROPS: 3996 for (int s = 0; s < sc->num_slices; s++) 3997 rv += sc->ss[s].tx.br->br_drops; 3998 return (rv); 3999 #endif 4000 default: 4001 return (if_get_counter_default(ifp, cnt)); 4002 } 4003 } 4004 4005 static void 4006 mxge_tick(void *arg) 4007 { 4008 mxge_softc_t *sc = arg; 4009 u_long pkts = 0; 4010 int err = 0; 4011 int running, ticks; 4012 uint16_t cmd; 4013 4014 ticks = mxge_ticks; 4015 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4016 if (running) { 4017 if (!sc->watchdog_countdown) { 4018 err = mxge_watchdog(sc); 4019 sc->watchdog_countdown = 4; 4020 } 4021 sc->watchdog_countdown--; 4022 } 4023 if (pkts == 0) { 4024 /* ensure NIC did not suffer h/w fault while idle */ 4025 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4026 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4027 sc->dying = 2; 4028 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4029 err = ENXIO; 4030 } 4031 /* look less often if NIC is idle */ 4032 ticks *= 4; 4033 } 4034 4035 if (err == 0) 4036 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4037 4038 } 4039 4040 static int 4041 mxge_media_change(struct ifnet *ifp) 4042 { 4043 return EINVAL; 4044 } 4045 4046 static int 4047 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4048 { 4049 struct ifnet *ifp = sc->ifp; 4050 int real_mtu, old_mtu; 4051 int err = 0; 4052 4053 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4054 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4055 return EINVAL; 4056 mtx_lock(&sc->driver_mtx); 4057 old_mtu = ifp->if_mtu; 4058 ifp->if_mtu = mtu; 4059 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4060 mxge_close(sc, 0); 4061 err = mxge_open(sc); 4062 if (err != 0) { 4063 ifp->if_mtu = old_mtu; 4064 mxge_close(sc, 0); 4065 (void) mxge_open(sc); 4066 } 4067 } 4068 mtx_unlock(&sc->driver_mtx); 4069 return err; 4070 } 4071 4072 static void 4073 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4074 { 4075 mxge_softc_t *sc = ifp->if_softc; 4076 4077 if (sc == NULL) 4078 return; 4079 ifmr->ifm_status = IFM_AVALID; 4080 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4081 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4082 ifmr->ifm_active |= sc->current_media; 4083 } 4084 4085 static int 4086 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 4087 { 4088 mxge_cmd_t cmd; 4089 uint32_t i2c_args; 4090 int i, ms, err; 4091 4092 if (i2c->dev_addr != 0xA0 && 4093 i2c->dev_addr != 0xA2) 4094 return (EINVAL); 4095 if (i2c->len > sizeof(i2c->data)) 4096 return (EINVAL); 4097 4098 for (i = 0; i < i2c->len; i++) { 4099 i2c_args = i2c->dev_addr << 0x8; 4100 i2c_args |= i2c->offset + i; 4101 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 4102 cmd.data1 = i2c_args; 4103 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 4104 4105 if (err != MXGEFW_CMD_OK) 4106 return (EIO); 4107 /* now we wait for the data to be cached */ 4108 cmd.data0 = i2c_args & 0xff; 4109 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4110 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 4111 cmd.data0 = i2c_args & 0xff; 4112 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4113 if (err == EBUSY) 4114 DELAY(1000); 4115 } 4116 if (err != MXGEFW_CMD_OK) 4117 return (EIO); 4118 i2c->data[i] = cmd.data0; 4119 } 4120 return (0); 4121 } 4122 4123 static int 4124 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4125 { 4126 mxge_softc_t *sc = ifp->if_softc; 4127 struct ifreq *ifr = (struct ifreq *)data; 4128 struct ifi2creq i2c; 4129 int err, mask; 4130 4131 err = 0; 4132 switch (command) { 4133 case SIOCSIFMTU: 4134 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4135 break; 4136 4137 case SIOCSIFFLAGS: 4138 mtx_lock(&sc->driver_mtx); 4139 if (sc->dying) { 4140 mtx_unlock(&sc->driver_mtx); 4141 return EINVAL; 4142 } 4143 if (ifp->if_flags & IFF_UP) { 4144 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4145 err = mxge_open(sc); 4146 } else { 4147 /* take care of promis can allmulti 4148 flag chages */ 4149 mxge_change_promisc(sc, 4150 ifp->if_flags & IFF_PROMISC); 4151 mxge_set_multicast_list(sc); 4152 } 4153 } else { 4154 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4155 mxge_close(sc, 0); 4156 } 4157 } 4158 mtx_unlock(&sc->driver_mtx); 4159 break; 4160 4161 case SIOCADDMULTI: 4162 case SIOCDELMULTI: 4163 mtx_lock(&sc->driver_mtx); 4164 if (sc->dying) { 4165 mtx_unlock(&sc->driver_mtx); 4166 return (EINVAL); 4167 } 4168 mxge_set_multicast_list(sc); 4169 mtx_unlock(&sc->driver_mtx); 4170 break; 4171 4172 case SIOCSIFCAP: 4173 mtx_lock(&sc->driver_mtx); 4174 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4175 if (mask & IFCAP_TXCSUM) { 4176 if (IFCAP_TXCSUM & ifp->if_capenable) { 4177 mask &= ~IFCAP_TSO4; 4178 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4179 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4180 } else { 4181 ifp->if_capenable |= IFCAP_TXCSUM; 4182 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4183 } 4184 } 4185 if (mask & IFCAP_RXCSUM) { 4186 if (IFCAP_RXCSUM & ifp->if_capenable) { 4187 ifp->if_capenable &= ~IFCAP_RXCSUM; 4188 } else { 4189 ifp->if_capenable |= IFCAP_RXCSUM; 4190 } 4191 } 4192 if (mask & IFCAP_TSO4) { 4193 if (IFCAP_TSO4 & ifp->if_capenable) { 4194 ifp->if_capenable &= ~IFCAP_TSO4; 4195 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4196 ifp->if_capenable |= IFCAP_TSO4; 4197 ifp->if_hwassist |= CSUM_TSO; 4198 } else { 4199 printf("mxge requires tx checksum offload" 4200 " be enabled to use TSO\n"); 4201 err = EINVAL; 4202 } 4203 } 4204 #if IFCAP_TSO6 4205 if (mask & IFCAP_TXCSUM_IPV6) { 4206 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4207 mask &= ~IFCAP_TSO6; 4208 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4209 | IFCAP_TSO6); 4210 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4211 | CSUM_UDP); 4212 } else { 4213 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4214 ifp->if_hwassist |= (CSUM_TCP_IPV6 4215 | CSUM_UDP_IPV6); 4216 } 4217 } 4218 if (mask & IFCAP_RXCSUM_IPV6) { 4219 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4220 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4221 } else { 4222 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4223 } 4224 } 4225 if (mask & IFCAP_TSO6) { 4226 if (IFCAP_TSO6 & ifp->if_capenable) { 4227 ifp->if_capenable &= ~IFCAP_TSO6; 4228 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4229 ifp->if_capenable |= IFCAP_TSO6; 4230 ifp->if_hwassist |= CSUM_TSO; 4231 } else { 4232 printf("mxge requires tx checksum offload" 4233 " be enabled to use TSO\n"); 4234 err = EINVAL; 4235 } 4236 } 4237 #endif /*IFCAP_TSO6 */ 4238 4239 if (mask & IFCAP_LRO) 4240 ifp->if_capenable ^= IFCAP_LRO; 4241 if (mask & IFCAP_VLAN_HWTAGGING) 4242 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4243 if (mask & IFCAP_VLAN_HWTSO) 4244 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4245 4246 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4247 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4248 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4249 4250 mtx_unlock(&sc->driver_mtx); 4251 VLAN_CAPABILITIES(ifp); 4252 4253 break; 4254 4255 case SIOCGIFMEDIA: 4256 mtx_lock(&sc->driver_mtx); 4257 if (sc->dying) { 4258 mtx_unlock(&sc->driver_mtx); 4259 return (EINVAL); 4260 } 4261 mxge_media_probe(sc); 4262 mtx_unlock(&sc->driver_mtx); 4263 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4264 &sc->media, command); 4265 break; 4266 4267 case SIOCGI2C: 4268 if (sc->connector != MXGE_XFP && 4269 sc->connector != MXGE_SFP) { 4270 err = ENXIO; 4271 break; 4272 } 4273 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4274 if (err != 0) 4275 break; 4276 mtx_lock(&sc->driver_mtx); 4277 if (sc->dying) { 4278 mtx_unlock(&sc->driver_mtx); 4279 return (EINVAL); 4280 } 4281 err = mxge_fetch_i2c(sc, &i2c); 4282 mtx_unlock(&sc->driver_mtx); 4283 if (err == 0) 4284 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4285 sizeof(i2c)); 4286 break; 4287 default: 4288 err = ether_ioctl(ifp, command, data); 4289 break; 4290 } 4291 return err; 4292 } 4293 4294 static void 4295 mxge_fetch_tunables(mxge_softc_t *sc) 4296 { 4297 4298 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4299 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4300 &mxge_flow_control); 4301 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4302 &mxge_intr_coal_delay); 4303 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4304 &mxge_nvidia_ecrc_enable); 4305 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4306 &mxge_force_firmware); 4307 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4308 &mxge_deassert_wait); 4309 TUNABLE_INT_FETCH("hw.mxge.verbose", 4310 &mxge_verbose); 4311 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4312 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4313 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4314 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4315 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4316 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4317 4318 if (bootverbose) 4319 mxge_verbose = 1; 4320 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4321 mxge_intr_coal_delay = 30; 4322 if (mxge_ticks == 0) 4323 mxge_ticks = hz / 2; 4324 sc->pause = mxge_flow_control; 4325 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4326 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4327 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4328 } 4329 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4330 mxge_initial_mtu < ETHER_MIN_LEN) 4331 mxge_initial_mtu = ETHERMTU_JUMBO; 4332 4333 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4334 mxge_throttle = MXGE_MAX_THROTTLE; 4335 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4336 mxge_throttle = MXGE_MIN_THROTTLE; 4337 sc->throttle = mxge_throttle; 4338 } 4339 4340 static void 4341 mxge_free_slices(mxge_softc_t *sc) 4342 { 4343 struct mxge_slice_state *ss; 4344 int i; 4345 4346 if (sc->ss == NULL) 4347 return; 4348 4349 for (i = 0; i < sc->num_slices; i++) { 4350 ss = &sc->ss[i]; 4351 if (ss->fw_stats != NULL) { 4352 mxge_dma_free(&ss->fw_stats_dma); 4353 ss->fw_stats = NULL; 4354 #ifdef IFNET_BUF_RING 4355 if (ss->tx.br != NULL) { 4356 drbr_free(ss->tx.br, M_DEVBUF); 4357 ss->tx.br = NULL; 4358 } 4359 #endif 4360 mtx_destroy(&ss->tx.mtx); 4361 } 4362 if (ss->rx_done.entry != NULL) { 4363 mxge_dma_free(&ss->rx_done.dma); 4364 ss->rx_done.entry = NULL; 4365 } 4366 } 4367 free(sc->ss, M_DEVBUF); 4368 sc->ss = NULL; 4369 } 4370 4371 static int 4372 mxge_alloc_slices(mxge_softc_t *sc) 4373 { 4374 mxge_cmd_t cmd; 4375 struct mxge_slice_state *ss; 4376 size_t bytes; 4377 int err, i, max_intr_slots; 4378 4379 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4380 if (err != 0) { 4381 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4382 return err; 4383 } 4384 sc->rx_ring_size = cmd.data0; 4385 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4386 4387 bytes = sizeof (*sc->ss) * sc->num_slices; 4388 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4389 if (sc->ss == NULL) 4390 return (ENOMEM); 4391 for (i = 0; i < sc->num_slices; i++) { 4392 ss = &sc->ss[i]; 4393 4394 ss->sc = sc; 4395 4396 /* allocate per-slice rx interrupt queues */ 4397 4398 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4399 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4400 if (err != 0) 4401 goto abort; 4402 ss->rx_done.entry = ss->rx_done.dma.addr; 4403 bzero(ss->rx_done.entry, bytes); 4404 4405 /* 4406 * allocate the per-slice firmware stats; stats 4407 * (including tx) are used used only on the first 4408 * slice for now 4409 */ 4410 #ifndef IFNET_BUF_RING 4411 if (i > 0) 4412 continue; 4413 #endif 4414 4415 bytes = sizeof (*ss->fw_stats); 4416 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4417 sizeof (*ss->fw_stats), 64); 4418 if (err != 0) 4419 goto abort; 4420 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4421 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4422 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4423 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4424 #ifdef IFNET_BUF_RING 4425 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4426 &ss->tx.mtx); 4427 #endif 4428 } 4429 4430 return (0); 4431 4432 abort: 4433 mxge_free_slices(sc); 4434 return (ENOMEM); 4435 } 4436 4437 static void 4438 mxge_slice_probe(mxge_softc_t *sc) 4439 { 4440 mxge_cmd_t cmd; 4441 char *old_fw; 4442 int msix_cnt, status, max_intr_slots; 4443 4444 sc->num_slices = 1; 4445 /* 4446 * don't enable multiple slices if they are not enabled, 4447 * or if this is not an SMP system 4448 */ 4449 4450 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4451 return; 4452 4453 /* see how many MSI-X interrupts are available */ 4454 msix_cnt = pci_msix_count(sc->dev); 4455 if (msix_cnt < 2) 4456 return; 4457 4458 /* now load the slice aware firmware see what it supports */ 4459 old_fw = sc->fw_name; 4460 if (old_fw == mxge_fw_aligned) 4461 sc->fw_name = mxge_fw_rss_aligned; 4462 else 4463 sc->fw_name = mxge_fw_rss_unaligned; 4464 status = mxge_load_firmware(sc, 0); 4465 if (status != 0) { 4466 device_printf(sc->dev, "Falling back to a single slice\n"); 4467 return; 4468 } 4469 4470 /* try to send a reset command to the card to see if it 4471 is alive */ 4472 memset(&cmd, 0, sizeof (cmd)); 4473 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4474 if (status != 0) { 4475 device_printf(sc->dev, "failed reset\n"); 4476 goto abort_with_fw; 4477 } 4478 4479 /* get rx ring size */ 4480 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4481 if (status != 0) { 4482 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4483 goto abort_with_fw; 4484 } 4485 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4486 4487 /* tell it the size of the interrupt queues */ 4488 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4489 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4490 if (status != 0) { 4491 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4492 goto abort_with_fw; 4493 } 4494 4495 /* ask the maximum number of slices it supports */ 4496 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4497 if (status != 0) { 4498 device_printf(sc->dev, 4499 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4500 goto abort_with_fw; 4501 } 4502 sc->num_slices = cmd.data0; 4503 if (sc->num_slices > msix_cnt) 4504 sc->num_slices = msix_cnt; 4505 4506 if (mxge_max_slices == -1) { 4507 /* cap to number of CPUs in system */ 4508 if (sc->num_slices > mp_ncpus) 4509 sc->num_slices = mp_ncpus; 4510 } else { 4511 if (sc->num_slices > mxge_max_slices) 4512 sc->num_slices = mxge_max_slices; 4513 } 4514 /* make sure it is a power of two */ 4515 while (sc->num_slices & (sc->num_slices - 1)) 4516 sc->num_slices--; 4517 4518 if (mxge_verbose) 4519 device_printf(sc->dev, "using %d slices\n", 4520 sc->num_slices); 4521 4522 return; 4523 4524 abort_with_fw: 4525 sc->fw_name = old_fw; 4526 (void) mxge_load_firmware(sc, 0); 4527 } 4528 4529 static int 4530 mxge_add_msix_irqs(mxge_softc_t *sc) 4531 { 4532 size_t bytes; 4533 int count, err, i, rid; 4534 4535 rid = PCIR_BAR(2); 4536 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4537 &rid, RF_ACTIVE); 4538 4539 if (sc->msix_table_res == NULL) { 4540 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4541 return ENXIO; 4542 } 4543 4544 count = sc->num_slices; 4545 err = pci_alloc_msix(sc->dev, &count); 4546 if (err != 0) { 4547 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4548 "err = %d \n", sc->num_slices, err); 4549 goto abort_with_msix_table; 4550 } 4551 if (count < sc->num_slices) { 4552 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4553 count, sc->num_slices); 4554 device_printf(sc->dev, 4555 "Try setting hw.mxge.max_slices to %d\n", 4556 count); 4557 err = ENOSPC; 4558 goto abort_with_msix; 4559 } 4560 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4561 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4562 if (sc->msix_irq_res == NULL) { 4563 err = ENOMEM; 4564 goto abort_with_msix; 4565 } 4566 4567 for (i = 0; i < sc->num_slices; i++) { 4568 rid = i + 1; 4569 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4570 SYS_RES_IRQ, 4571 &rid, RF_ACTIVE); 4572 if (sc->msix_irq_res[i] == NULL) { 4573 device_printf(sc->dev, "couldn't allocate IRQ res" 4574 " for message %d\n", i); 4575 err = ENXIO; 4576 goto abort_with_res; 4577 } 4578 } 4579 4580 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4581 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4582 4583 for (i = 0; i < sc->num_slices; i++) { 4584 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4585 INTR_TYPE_NET | INTR_MPSAFE, 4586 #if __FreeBSD_version > 700030 4587 NULL, 4588 #endif 4589 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4590 if (err != 0) { 4591 device_printf(sc->dev, "couldn't setup intr for " 4592 "message %d\n", i); 4593 goto abort_with_intr; 4594 } 4595 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4596 sc->msix_ih[i], "s%d", i); 4597 } 4598 4599 if (mxge_verbose) { 4600 device_printf(sc->dev, "using %d msix IRQs:", 4601 sc->num_slices); 4602 for (i = 0; i < sc->num_slices; i++) 4603 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4604 printf("\n"); 4605 } 4606 return (0); 4607 4608 abort_with_intr: 4609 for (i = 0; i < sc->num_slices; i++) { 4610 if (sc->msix_ih[i] != NULL) { 4611 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4612 sc->msix_ih[i]); 4613 sc->msix_ih[i] = NULL; 4614 } 4615 } 4616 free(sc->msix_ih, M_DEVBUF); 4617 4618 abort_with_res: 4619 for (i = 0; i < sc->num_slices; i++) { 4620 rid = i + 1; 4621 if (sc->msix_irq_res[i] != NULL) 4622 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4623 sc->msix_irq_res[i]); 4624 sc->msix_irq_res[i] = NULL; 4625 } 4626 free(sc->msix_irq_res, M_DEVBUF); 4627 4628 abort_with_msix: 4629 pci_release_msi(sc->dev); 4630 4631 abort_with_msix_table: 4632 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4633 sc->msix_table_res); 4634 4635 return err; 4636 } 4637 4638 static int 4639 mxge_add_single_irq(mxge_softc_t *sc) 4640 { 4641 int count, err, rid; 4642 4643 count = pci_msi_count(sc->dev); 4644 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4645 rid = 1; 4646 } else { 4647 rid = 0; 4648 sc->legacy_irq = 1; 4649 } 4650 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4651 RF_SHAREABLE | RF_ACTIVE); 4652 if (sc->irq_res == NULL) { 4653 device_printf(sc->dev, "could not alloc interrupt\n"); 4654 return ENXIO; 4655 } 4656 if (mxge_verbose) 4657 device_printf(sc->dev, "using %s irq %jd\n", 4658 sc->legacy_irq ? "INTx" : "MSI", 4659 rman_get_start(sc->irq_res)); 4660 err = bus_setup_intr(sc->dev, sc->irq_res, 4661 INTR_TYPE_NET | INTR_MPSAFE, 4662 #if __FreeBSD_version > 700030 4663 NULL, 4664 #endif 4665 mxge_intr, &sc->ss[0], &sc->ih); 4666 if (err != 0) { 4667 bus_release_resource(sc->dev, SYS_RES_IRQ, 4668 sc->legacy_irq ? 0 : 1, sc->irq_res); 4669 if (!sc->legacy_irq) 4670 pci_release_msi(sc->dev); 4671 } 4672 return err; 4673 } 4674 4675 static void 4676 mxge_rem_msix_irqs(mxge_softc_t *sc) 4677 { 4678 int i, rid; 4679 4680 for (i = 0; i < sc->num_slices; i++) { 4681 if (sc->msix_ih[i] != NULL) { 4682 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4683 sc->msix_ih[i]); 4684 sc->msix_ih[i] = NULL; 4685 } 4686 } 4687 free(sc->msix_ih, M_DEVBUF); 4688 4689 for (i = 0; i < sc->num_slices; i++) { 4690 rid = i + 1; 4691 if (sc->msix_irq_res[i] != NULL) 4692 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4693 sc->msix_irq_res[i]); 4694 sc->msix_irq_res[i] = NULL; 4695 } 4696 free(sc->msix_irq_res, M_DEVBUF); 4697 4698 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4699 sc->msix_table_res); 4700 4701 pci_release_msi(sc->dev); 4702 return; 4703 } 4704 4705 static void 4706 mxge_rem_single_irq(mxge_softc_t *sc) 4707 { 4708 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4709 bus_release_resource(sc->dev, SYS_RES_IRQ, 4710 sc->legacy_irq ? 0 : 1, sc->irq_res); 4711 if (!sc->legacy_irq) 4712 pci_release_msi(sc->dev); 4713 } 4714 4715 static void 4716 mxge_rem_irq(mxge_softc_t *sc) 4717 { 4718 if (sc->num_slices > 1) 4719 mxge_rem_msix_irqs(sc); 4720 else 4721 mxge_rem_single_irq(sc); 4722 } 4723 4724 static int 4725 mxge_add_irq(mxge_softc_t *sc) 4726 { 4727 int err; 4728 4729 if (sc->num_slices > 1) 4730 err = mxge_add_msix_irqs(sc); 4731 else 4732 err = mxge_add_single_irq(sc); 4733 4734 if (0 && err == 0 && sc->num_slices > 1) { 4735 mxge_rem_msix_irqs(sc); 4736 err = mxge_add_msix_irqs(sc); 4737 } 4738 return err; 4739 } 4740 4741 static int 4742 mxge_attach(device_t dev) 4743 { 4744 mxge_cmd_t cmd; 4745 mxge_softc_t *sc = device_get_softc(dev); 4746 struct ifnet *ifp; 4747 int err, rid; 4748 4749 sc->dev = dev; 4750 mxge_fetch_tunables(sc); 4751 4752 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4753 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4754 taskqueue_thread_enqueue, &sc->tq); 4755 if (sc->tq == NULL) { 4756 err = ENOMEM; 4757 goto abort_with_nothing; 4758 } 4759 4760 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4761 1, /* alignment */ 4762 0, /* boundary */ 4763 BUS_SPACE_MAXADDR, /* low */ 4764 BUS_SPACE_MAXADDR, /* high */ 4765 NULL, NULL, /* filter */ 4766 65536 + 256, /* maxsize */ 4767 MXGE_MAX_SEND_DESC, /* num segs */ 4768 65536, /* maxsegsize */ 4769 0, /* flags */ 4770 NULL, NULL, /* lock */ 4771 &sc->parent_dmat); /* tag */ 4772 4773 if (err != 0) { 4774 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4775 err); 4776 goto abort_with_tq; 4777 } 4778 4779 ifp = sc->ifp = if_alloc(IFT_ETHER); 4780 if (ifp == NULL) { 4781 device_printf(dev, "can not if_alloc()\n"); 4782 err = ENOSPC; 4783 goto abort_with_parent_dmat; 4784 } 4785 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4786 4787 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4788 device_get_nameunit(dev)); 4789 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4790 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4791 "%s:drv", device_get_nameunit(dev)); 4792 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4793 MTX_NETWORK_LOCK, MTX_DEF); 4794 4795 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4796 4797 mxge_setup_cfg_space(sc); 4798 4799 /* Map the board into the kernel */ 4800 rid = PCIR_BARS; 4801 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4802 RF_ACTIVE); 4803 if (sc->mem_res == NULL) { 4804 device_printf(dev, "could not map memory\n"); 4805 err = ENXIO; 4806 goto abort_with_lock; 4807 } 4808 sc->sram = rman_get_virtual(sc->mem_res); 4809 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4810 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4811 device_printf(dev, "impossible memory region size %jd\n", 4812 rman_get_size(sc->mem_res)); 4813 err = ENXIO; 4814 goto abort_with_mem_res; 4815 } 4816 4817 /* make NULL terminated copy of the EEPROM strings section of 4818 lanai SRAM */ 4819 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4820 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4821 rman_get_bushandle(sc->mem_res), 4822 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4823 sc->eeprom_strings, 4824 MXGE_EEPROM_STRINGS_SIZE - 2); 4825 err = mxge_parse_strings(sc); 4826 if (err != 0) 4827 goto abort_with_mem_res; 4828 4829 /* Enable write combining for efficient use of PCIe bus */ 4830 mxge_enable_wc(sc); 4831 4832 /* Allocate the out of band dma memory */ 4833 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4834 sizeof (mxge_cmd_t), 64); 4835 if (err != 0) 4836 goto abort_with_mem_res; 4837 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4838 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4839 if (err != 0) 4840 goto abort_with_cmd_dma; 4841 4842 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4843 if (err != 0) 4844 goto abort_with_zeropad_dma; 4845 4846 /* select & load the firmware */ 4847 err = mxge_select_firmware(sc); 4848 if (err != 0) 4849 goto abort_with_dmabench; 4850 sc->intr_coal_delay = mxge_intr_coal_delay; 4851 4852 mxge_slice_probe(sc); 4853 err = mxge_alloc_slices(sc); 4854 if (err != 0) 4855 goto abort_with_dmabench; 4856 4857 err = mxge_reset(sc, 0); 4858 if (err != 0) 4859 goto abort_with_slices; 4860 4861 err = mxge_alloc_rings(sc); 4862 if (err != 0) { 4863 device_printf(sc->dev, "failed to allocate rings\n"); 4864 goto abort_with_slices; 4865 } 4866 4867 err = mxge_add_irq(sc); 4868 if (err != 0) { 4869 device_printf(sc->dev, "failed to add irq\n"); 4870 goto abort_with_rings; 4871 } 4872 4873 ifp->if_baudrate = IF_Gbps(10); 4874 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4875 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4876 IFCAP_RXCSUM_IPV6; 4877 #if defined(INET) || defined(INET6) 4878 ifp->if_capabilities |= IFCAP_LRO; 4879 #endif 4880 4881 #ifdef MXGE_NEW_VLAN_API 4882 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4883 4884 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4885 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4886 sc->fw_ver_tiny >= 32) 4887 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4888 #endif 4889 sc->max_mtu = mxge_max_mtu(sc); 4890 if (sc->max_mtu >= 9000) 4891 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4892 else 4893 device_printf(dev, "MTU limited to %d. Install " 4894 "latest firmware for 9000 byte jumbo support\n", 4895 sc->max_mtu - ETHER_HDR_LEN); 4896 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4897 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4898 /* check to see if f/w supports TSO for IPv6 */ 4899 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4900 if (CSUM_TCP_IPV6) 4901 ifp->if_capabilities |= IFCAP_TSO6; 4902 sc->max_tso6_hlen = min(cmd.data0, 4903 sizeof (sc->ss[0].scratch)); 4904 } 4905 ifp->if_capenable = ifp->if_capabilities; 4906 if (sc->lro_cnt == 0) 4907 ifp->if_capenable &= ~IFCAP_LRO; 4908 ifp->if_init = mxge_init; 4909 ifp->if_softc = sc; 4910 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4911 ifp->if_ioctl = mxge_ioctl; 4912 ifp->if_start = mxge_start; 4913 ifp->if_get_counter = mxge_get_counter; 4914 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4915 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4916 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4917 /* Initialise the ifmedia structure */ 4918 ifmedia_init(&sc->media, 0, mxge_media_change, 4919 mxge_media_status); 4920 mxge_media_init(sc); 4921 mxge_media_probe(sc); 4922 sc->dying = 0; 4923 ether_ifattach(ifp, sc->mac_addr); 4924 /* ether_ifattach sets mtu to ETHERMTU */ 4925 if (mxge_initial_mtu != ETHERMTU) 4926 mxge_change_mtu(sc, mxge_initial_mtu); 4927 4928 mxge_add_sysctls(sc); 4929 #ifdef IFNET_BUF_RING 4930 ifp->if_transmit = mxge_transmit; 4931 ifp->if_qflush = mxge_qflush; 4932 #endif 4933 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4934 device_get_nameunit(sc->dev)); 4935 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4936 return 0; 4937 4938 abort_with_rings: 4939 mxge_free_rings(sc); 4940 abort_with_slices: 4941 mxge_free_slices(sc); 4942 abort_with_dmabench: 4943 mxge_dma_free(&sc->dmabench_dma); 4944 abort_with_zeropad_dma: 4945 mxge_dma_free(&sc->zeropad_dma); 4946 abort_with_cmd_dma: 4947 mxge_dma_free(&sc->cmd_dma); 4948 abort_with_mem_res: 4949 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4950 abort_with_lock: 4951 pci_disable_busmaster(dev); 4952 mtx_destroy(&sc->cmd_mtx); 4953 mtx_destroy(&sc->driver_mtx); 4954 if_free(ifp); 4955 abort_with_parent_dmat: 4956 bus_dma_tag_destroy(sc->parent_dmat); 4957 abort_with_tq: 4958 if (sc->tq != NULL) { 4959 taskqueue_drain(sc->tq, &sc->watchdog_task); 4960 taskqueue_free(sc->tq); 4961 sc->tq = NULL; 4962 } 4963 abort_with_nothing: 4964 return err; 4965 } 4966 4967 static int 4968 mxge_detach(device_t dev) 4969 { 4970 mxge_softc_t *sc = device_get_softc(dev); 4971 4972 if (mxge_vlans_active(sc)) { 4973 device_printf(sc->dev, 4974 "Detach vlans before removing module\n"); 4975 return EBUSY; 4976 } 4977 mtx_lock(&sc->driver_mtx); 4978 sc->dying = 1; 4979 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4980 mxge_close(sc, 0); 4981 mtx_unlock(&sc->driver_mtx); 4982 ether_ifdetach(sc->ifp); 4983 if (sc->tq != NULL) { 4984 taskqueue_drain(sc->tq, &sc->watchdog_task); 4985 taskqueue_free(sc->tq); 4986 sc->tq = NULL; 4987 } 4988 callout_drain(&sc->co_hdl); 4989 ifmedia_removeall(&sc->media); 4990 mxge_dummy_rdma(sc, 0); 4991 mxge_rem_sysctls(sc); 4992 mxge_rem_irq(sc); 4993 mxge_free_rings(sc); 4994 mxge_free_slices(sc); 4995 mxge_dma_free(&sc->dmabench_dma); 4996 mxge_dma_free(&sc->zeropad_dma); 4997 mxge_dma_free(&sc->cmd_dma); 4998 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4999 pci_disable_busmaster(dev); 5000 mtx_destroy(&sc->cmd_mtx); 5001 mtx_destroy(&sc->driver_mtx); 5002 if_free(sc->ifp); 5003 bus_dma_tag_destroy(sc->parent_dmat); 5004 return 0; 5005 } 5006 5007 static int 5008 mxge_shutdown(device_t dev) 5009 { 5010 return 0; 5011 } 5012 5013 /* 5014 This file uses Myri10GE driver indentation. 5015 5016 Local Variables: 5017 c-file-style:"linux" 5018 tab-width:8 5019 End: 5020 */ 5021