1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #ifdef IFNET_BUF_RING 96 #include <sys/buf_ring.h> 97 #endif 98 99 #include "opt_inet.h" 100 #include "opt_inet6.h" 101 102 /* tunable params */ 103 static int mxge_nvidia_ecrc_enable = 1; 104 static int mxge_force_firmware = 0; 105 static int mxge_intr_coal_delay = 30; 106 static int mxge_deassert_wait = 1; 107 static int mxge_flow_control = 1; 108 static int mxge_verbose = 0; 109 static int mxge_ticks; 110 static int mxge_max_slices = 1; 111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 112 static int mxge_always_promisc = 0; 113 static int mxge_initial_mtu = ETHERMTU_JUMBO; 114 static int mxge_throttle = 0; 115 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 116 static char *mxge_fw_aligned = "mxge_eth_z8e"; 117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 119 120 static int mxge_probe(device_t dev); 121 static int mxge_attach(device_t dev); 122 static int mxge_detach(device_t dev); 123 static int mxge_shutdown(device_t dev); 124 static void mxge_intr(void *arg); 125 126 static device_method_t mxge_methods[] = 127 { 128 /* Device interface */ 129 DEVMETHOD(device_probe, mxge_probe), 130 DEVMETHOD(device_attach, mxge_attach), 131 DEVMETHOD(device_detach, mxge_detach), 132 DEVMETHOD(device_shutdown, mxge_shutdown), 133 134 DEVMETHOD_END 135 }; 136 137 static driver_t mxge_driver = 138 { 139 "mxge", 140 mxge_methods, 141 sizeof(mxge_softc_t), 142 }; 143 144 static devclass_t mxge_devclass; 145 146 /* Declare ourselves to be a child of the PCI bus.*/ 147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 148 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 149 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 150 151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 153 static int mxge_close(mxge_softc_t *sc, int down); 154 static int mxge_open(mxge_softc_t *sc); 155 static void mxge_tick(void *arg); 156 157 static int 158 mxge_probe(device_t dev) 159 { 160 int rev; 161 162 163 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 164 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 165 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 166 rev = pci_get_revid(dev); 167 switch (rev) { 168 case MXGE_PCI_REV_Z8E: 169 device_set_desc(dev, "Myri10G-PCIE-8A"); 170 break; 171 case MXGE_PCI_REV_Z8ES: 172 device_set_desc(dev, "Myri10G-PCIE-8B"); 173 break; 174 default: 175 device_set_desc(dev, "Myri10G-PCIE-8??"); 176 device_printf(dev, "Unrecognized rev %d NIC\n", 177 rev); 178 break; 179 } 180 return 0; 181 } 182 return ENXIO; 183 } 184 185 static void 186 mxge_enable_wc(mxge_softc_t *sc) 187 { 188 #if defined(__i386) || defined(__amd64) 189 vm_offset_t len; 190 int err; 191 192 sc->wc = 1; 193 len = rman_get_size(sc->mem_res); 194 err = pmap_change_attr((vm_offset_t) sc->sram, 195 len, PAT_WRITE_COMBINING); 196 if (err != 0) { 197 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 198 err); 199 sc->wc = 0; 200 } 201 #endif 202 } 203 204 205 /* callback to get our DMA address */ 206 static void 207 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 208 int error) 209 { 210 if (error == 0) { 211 *(bus_addr_t *) arg = segs->ds_addr; 212 } 213 } 214 215 static int 216 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 217 bus_size_t alignment) 218 { 219 int err; 220 device_t dev = sc->dev; 221 bus_size_t boundary, maxsegsize; 222 223 if (bytes > 4096 && alignment == 4096) { 224 boundary = 0; 225 maxsegsize = bytes; 226 } else { 227 boundary = 4096; 228 maxsegsize = 4096; 229 } 230 231 /* allocate DMAable memory tags */ 232 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 233 alignment, /* alignment */ 234 boundary, /* boundary */ 235 BUS_SPACE_MAXADDR, /* low */ 236 BUS_SPACE_MAXADDR, /* high */ 237 NULL, NULL, /* filter */ 238 bytes, /* maxsize */ 239 1, /* num segs */ 240 maxsegsize, /* maxsegsize */ 241 BUS_DMA_COHERENT, /* flags */ 242 NULL, NULL, /* lock */ 243 &dma->dmat); /* tag */ 244 if (err != 0) { 245 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 246 return err; 247 } 248 249 /* allocate DMAable memory & map */ 250 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 251 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 252 | BUS_DMA_ZERO), &dma->map); 253 if (err != 0) { 254 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 255 goto abort_with_dmat; 256 } 257 258 /* load the memory */ 259 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 260 mxge_dmamap_callback, 261 (void *)&dma->bus_addr, 0); 262 if (err != 0) { 263 device_printf(dev, "couldn't load map (err = %d)\n", err); 264 goto abort_with_mem; 265 } 266 return 0; 267 268 abort_with_mem: 269 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 270 abort_with_dmat: 271 (void)bus_dma_tag_destroy(dma->dmat); 272 return err; 273 } 274 275 276 static void 277 mxge_dma_free(mxge_dma_t *dma) 278 { 279 bus_dmamap_unload(dma->dmat, dma->map); 280 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 281 (void)bus_dma_tag_destroy(dma->dmat); 282 } 283 284 /* 285 * The eeprom strings on the lanaiX have the format 286 * SN=x\0 287 * MAC=x:x:x:x:x:x\0 288 * PC=text\0 289 */ 290 291 static int 292 mxge_parse_strings(mxge_softc_t *sc) 293 { 294 char *ptr; 295 int i, found_mac, found_sn2; 296 char *endptr; 297 298 ptr = sc->eeprom_strings; 299 found_mac = 0; 300 found_sn2 = 0; 301 while (*ptr != '\0') { 302 if (strncmp(ptr, "MAC=", 4) == 0) { 303 ptr += 4; 304 for (i = 0;;) { 305 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 306 if (endptr - ptr != 2) 307 goto abort; 308 ptr = endptr; 309 if (++i == 6) 310 break; 311 if (*ptr++ != ':') 312 goto abort; 313 } 314 found_mac = 1; 315 } else if (strncmp(ptr, "PC=", 3) == 0) { 316 ptr += 3; 317 strlcpy(sc->product_code_string, ptr, 318 sizeof(sc->product_code_string)); 319 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 320 ptr += 3; 321 strlcpy(sc->serial_number_string, ptr, 322 sizeof(sc->serial_number_string)); 323 } else if (strncmp(ptr, "SN2=", 4) == 0) { 324 /* SN2 takes precedence over SN */ 325 ptr += 4; 326 found_sn2 = 1; 327 strlcpy(sc->serial_number_string, ptr, 328 sizeof(sc->serial_number_string)); 329 } 330 while (*ptr++ != '\0') {} 331 } 332 333 if (found_mac) 334 return 0; 335 336 abort: 337 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 338 339 return ENXIO; 340 } 341 342 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 343 static void 344 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 345 { 346 uint32_t val; 347 unsigned long base, off; 348 char *va, *cfgptr; 349 device_t pdev, mcp55; 350 uint16_t vendor_id, device_id, word; 351 uintptr_t bus, slot, func, ivend, idev; 352 uint32_t *ptr32; 353 354 355 if (!mxge_nvidia_ecrc_enable) 356 return; 357 358 pdev = device_get_parent(device_get_parent(sc->dev)); 359 if (pdev == NULL) { 360 device_printf(sc->dev, "could not find parent?\n"); 361 return; 362 } 363 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 364 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 365 366 if (vendor_id != 0x10de) 367 return; 368 369 base = 0; 370 371 if (device_id == 0x005d) { 372 /* ck804, base address is magic */ 373 base = 0xe0000000UL; 374 } else if (device_id >= 0x0374 && device_id <= 0x378) { 375 /* mcp55, base address stored in chipset */ 376 mcp55 = pci_find_bsf(0, 0, 0); 377 if (mcp55 && 378 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 379 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 380 word = pci_read_config(mcp55, 0x90, 2); 381 base = ((unsigned long)word & 0x7ffeU) << 25; 382 } 383 } 384 if (!base) 385 return; 386 387 /* XXXX 388 Test below is commented because it is believed that doing 389 config read/write beyond 0xff will access the config space 390 for the next larger function. Uncomment this and remove 391 the hacky pmap_mapdev() way of accessing config space when 392 FreeBSD grows support for extended pcie config space access 393 */ 394 #if 0 395 /* See if we can, by some miracle, access the extended 396 config space */ 397 val = pci_read_config(pdev, 0x178, 4); 398 if (val != 0xffffffff) { 399 val |= 0x40; 400 pci_write_config(pdev, 0x178, val, 4); 401 return; 402 } 403 #endif 404 /* Rather than using normal pci config space writes, we must 405 * map the Nvidia config space ourselves. This is because on 406 * opteron/nvidia class machine the 0xe000000 mapping is 407 * handled by the nvidia chipset, that means the internal PCI 408 * device (the on-chip northbridge), or the amd-8131 bridge 409 * and things behind them are not visible by this method. 410 */ 411 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_BUS, &bus); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_SLOT, &slot); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_FUNCTION, &func); 418 BUS_READ_IVAR(device_get_parent(pdev), pdev, 419 PCI_IVAR_VENDOR, &ivend); 420 BUS_READ_IVAR(device_get_parent(pdev), pdev, 421 PCI_IVAR_DEVICE, &idev); 422 423 off = base 424 + 0x00100000UL * (unsigned long)bus 425 + 0x00001000UL * (unsigned long)(func 426 + 8 * slot); 427 428 /* map it into the kernel */ 429 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 430 431 432 if (va == NULL) { 433 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 434 return; 435 } 436 /* get a pointer to the config space mapped into the kernel */ 437 cfgptr = va + (off & PAGE_MASK); 438 439 /* make sure that we can really access it */ 440 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 441 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 442 if (! (vendor_id == ivend && device_id == idev)) { 443 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 444 vendor_id, device_id); 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 return; 447 } 448 449 ptr32 = (uint32_t*)(cfgptr + 0x178); 450 val = *ptr32; 451 452 if (val == 0xffffffff) { 453 device_printf(sc->dev, "extended mapping failed\n"); 454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 455 return; 456 } 457 *ptr32 = val | 0x40; 458 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 459 if (mxge_verbose) 460 device_printf(sc->dev, 461 "Enabled ECRC on upstream Nvidia bridge " 462 "at %d:%d:%d\n", 463 (int)bus, (int)slot, (int)func); 464 return; 465 } 466 #else 467 static void 468 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 469 { 470 device_printf(sc->dev, 471 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 472 return; 473 } 474 #endif 475 476 477 static int 478 mxge_dma_test(mxge_softc_t *sc, int test_type) 479 { 480 mxge_cmd_t cmd; 481 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 482 int status; 483 uint32_t len; 484 char *test = " "; 485 486 487 /* Run a small DMA test. 488 * The magic multipliers to the length tell the firmware 489 * to do DMA read, write, or read+write tests. The 490 * results are returned in cmd.data0. The upper 16 491 * bits of the return is the number of transfers completed. 492 * The lower 16 bits is the time in 0.5us ticks that the 493 * transfers took to complete. 494 */ 495 496 len = sc->tx_boundary; 497 498 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 499 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 500 cmd.data2 = len * 0x10000; 501 status = mxge_send_cmd(sc, test_type, &cmd); 502 if (status != 0) { 503 test = "read"; 504 goto abort; 505 } 506 sc->read_dma = ((cmd.data0>>16) * len * 2) / 507 (cmd.data0 & 0xffff); 508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 510 cmd.data2 = len * 0x1; 511 status = mxge_send_cmd(sc, test_type, &cmd); 512 if (status != 0) { 513 test = "write"; 514 goto abort; 515 } 516 sc->write_dma = ((cmd.data0>>16) * len * 2) / 517 (cmd.data0 & 0xffff); 518 519 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 520 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 521 cmd.data2 = len * 0x10001; 522 status = mxge_send_cmd(sc, test_type, &cmd); 523 if (status != 0) { 524 test = "read/write"; 525 goto abort; 526 } 527 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 528 (cmd.data0 & 0xffff); 529 530 abort: 531 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 532 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 533 test, status); 534 535 return status; 536 } 537 538 /* 539 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 540 * when the PCI-E Completion packets are aligned on an 8-byte 541 * boundary. Some PCI-E chip sets always align Completion packets; on 542 * the ones that do not, the alignment can be enforced by enabling 543 * ECRC generation (if supported). 544 * 545 * When PCI-E Completion packets are not aligned, it is actually more 546 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 547 * 548 * If the driver can neither enable ECRC nor verify that it has 549 * already been enabled, then it must use a firmware image which works 550 * around unaligned completion packets (ethp_z8e.dat), and it should 551 * also ensure that it never gives the device a Read-DMA which is 552 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 553 * enabled, then the driver should use the aligned (eth_z8e.dat) 554 * firmware image, and set tx_boundary to 4KB. 555 */ 556 557 static int 558 mxge_firmware_probe(mxge_softc_t *sc) 559 { 560 device_t dev = sc->dev; 561 int reg, status; 562 uint16_t pectl; 563 564 sc->tx_boundary = 4096; 565 /* 566 * Verify the max read request size was set to 4KB 567 * before trying the test with 4KB. 568 */ 569 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 570 pectl = pci_read_config(dev, reg + 0x8, 2); 571 if ((pectl & (5 << 12)) != (5 << 12)) { 572 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 573 pectl); 574 sc->tx_boundary = 2048; 575 } 576 } 577 578 /* 579 * load the optimized firmware (which assumes aligned PCIe 580 * completions) in order to see if it works on this host. 581 */ 582 sc->fw_name = mxge_fw_aligned; 583 status = mxge_load_firmware(sc, 1); 584 if (status != 0) { 585 return status; 586 } 587 588 /* 589 * Enable ECRC if possible 590 */ 591 mxge_enable_nvidia_ecrc(sc); 592 593 /* 594 * Run a DMA test which watches for unaligned completions and 595 * aborts on the first one seen. Not required on Z8ES or newer. 596 */ 597 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 598 return 0; 599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 600 if (status == 0) 601 return 0; /* keep the aligned firmware */ 602 603 if (status != E2BIG) 604 device_printf(dev, "DMA test failed: %d\n", status); 605 if (status == ENOSYS) 606 device_printf(dev, "Falling back to ethp! " 607 "Please install up to date fw\n"); 608 return status; 609 } 610 611 static int 612 mxge_select_firmware(mxge_softc_t *sc) 613 { 614 int aligned = 0; 615 int force_firmware = mxge_force_firmware; 616 617 if (sc->throttle) 618 force_firmware = sc->throttle; 619 620 if (force_firmware != 0) { 621 if (force_firmware == 1) 622 aligned = 1; 623 else 624 aligned = 0; 625 if (mxge_verbose) 626 device_printf(sc->dev, 627 "Assuming %s completions (forced)\n", 628 aligned ? "aligned" : "unaligned"); 629 goto abort; 630 } 631 632 /* if the PCIe link width is 4 or less, we can use the aligned 633 firmware and skip any checks */ 634 if (sc->link_width != 0 && sc->link_width <= 4) { 635 device_printf(sc->dev, 636 "PCIe x%d Link, expect reduced performance\n", 637 sc->link_width); 638 aligned = 1; 639 goto abort; 640 } 641 642 if (0 == mxge_firmware_probe(sc)) 643 return 0; 644 645 abort: 646 if (aligned) { 647 sc->fw_name = mxge_fw_aligned; 648 sc->tx_boundary = 4096; 649 } else { 650 sc->fw_name = mxge_fw_unaligned; 651 sc->tx_boundary = 2048; 652 } 653 return (mxge_load_firmware(sc, 0)); 654 } 655 656 static int 657 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 658 { 659 660 661 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 662 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 663 be32toh(hdr->mcp_type)); 664 return EIO; 665 } 666 667 /* save firmware version for sysctl */ 668 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 669 if (mxge_verbose) 670 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 671 672 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 673 &sc->fw_ver_minor, &sc->fw_ver_tiny); 674 675 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 676 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 677 device_printf(sc->dev, "Found firmware version %s\n", 678 sc->fw_version); 679 device_printf(sc->dev, "Driver needs %d.%d\n", 680 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 681 return EINVAL; 682 } 683 return 0; 684 685 } 686 687 static int 688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 689 { 690 z_stream zs; 691 char *inflate_buffer; 692 const struct firmware *fw; 693 const mcp_gen_header_t *hdr; 694 unsigned hdr_offset; 695 int status; 696 unsigned int i; 697 char dummy; 698 size_t fw_len; 699 700 fw = firmware_get(sc->fw_name); 701 if (fw == NULL) { 702 device_printf(sc->dev, "Could not find firmware image %s\n", 703 sc->fw_name); 704 return ENOENT; 705 } 706 707 708 709 /* setup zlib and decompress f/w */ 710 bzero(&zs, sizeof (zs)); 711 zs.zalloc = zcalloc_nowait; 712 zs.zfree = zcfree; 713 status = inflateInit(&zs); 714 if (status != Z_OK) { 715 status = EIO; 716 goto abort_with_fw; 717 } 718 719 /* the uncompressed size is stored as the firmware version, 720 which would otherwise go unused */ 721 fw_len = (size_t) fw->version; 722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 723 if (inflate_buffer == NULL) 724 goto abort_with_zs; 725 zs.avail_in = fw->datasize; 726 zs.next_in = __DECONST(char *, fw->data); 727 zs.avail_out = fw_len; 728 zs.next_out = inflate_buffer; 729 status = inflate(&zs, Z_FINISH); 730 if (status != Z_STREAM_END) { 731 device_printf(sc->dev, "zlib %d\n", status); 732 status = EIO; 733 goto abort_with_buffer; 734 } 735 736 /* check id */ 737 hdr_offset = htobe32(*(const uint32_t *) 738 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 740 device_printf(sc->dev, "Bad firmware file"); 741 status = EIO; 742 goto abort_with_buffer; 743 } 744 hdr = (const void*)(inflate_buffer + hdr_offset); 745 746 status = mxge_validate_firmware(sc, hdr); 747 if (status != 0) 748 goto abort_with_buffer; 749 750 /* Copy the inflated firmware to NIC SRAM. */ 751 for (i = 0; i < fw_len; i += 256) { 752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 753 inflate_buffer + i, 754 min(256U, (unsigned)(fw_len - i))); 755 wmb(); 756 dummy = *sc->sram; 757 wmb(); 758 } 759 760 *limit = fw_len; 761 status = 0; 762 abort_with_buffer: 763 free(inflate_buffer, M_TEMP); 764 abort_with_zs: 765 inflateEnd(&zs); 766 abort_with_fw: 767 firmware_put(fw, FIRMWARE_UNLOAD); 768 return status; 769 } 770 771 /* 772 * Enable or disable periodic RDMAs from the host to make certain 773 * chipsets resend dropped PCIe messages 774 */ 775 776 static void 777 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 778 { 779 char buf_bytes[72]; 780 volatile uint32_t *confirm; 781 volatile char *submit; 782 uint32_t *buf, dma_low, dma_high; 783 int i; 784 785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 786 787 /* clear confirmation addr */ 788 confirm = (volatile uint32_t *)sc->cmd; 789 *confirm = 0; 790 wmb(); 791 792 /* send an rdma command to the PCIe engine, and wait for the 793 response in the confirmation address. The firmware should 794 write a -1 there to indicate it is alive and well 795 */ 796 797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 799 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 800 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 801 buf[2] = htobe32(0xffffffff); /* confirm data */ 802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 804 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 805 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 806 buf[5] = htobe32(enable); /* enable? */ 807 808 809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 810 811 mxge_pio_copy(submit, buf, 64); 812 wmb(); 813 DELAY(1000); 814 wmb(); 815 i = 0; 816 while (*confirm != 0xffffffff && i < 20) { 817 DELAY(1000); 818 i++; 819 } 820 if (*confirm != 0xffffffff) { 821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 822 (enable ? "enable" : "disable"), confirm, 823 *confirm); 824 } 825 return; 826 } 827 828 static int 829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 830 { 831 mcp_cmd_t *buf; 832 char buf_bytes[sizeof(*buf) + 8]; 833 volatile mcp_cmd_response_t *response = sc->cmd; 834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 835 uint32_t dma_low, dma_high; 836 int err, sleep_total = 0; 837 838 /* ensure buf is aligned to 8 bytes */ 839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 840 841 buf->data0 = htobe32(data->data0); 842 buf->data1 = htobe32(data->data1); 843 buf->data2 = htobe32(data->data2); 844 buf->cmd = htobe32(cmd); 845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 847 848 buf->response_addr.low = htobe32(dma_low); 849 buf->response_addr.high = htobe32(dma_high); 850 mtx_lock(&sc->cmd_mtx); 851 response->result = 0xffffffff; 852 wmb(); 853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 854 855 /* wait up to 20ms */ 856 err = EAGAIN; 857 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 858 bus_dmamap_sync(sc->cmd_dma.dmat, 859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 860 wmb(); 861 switch (be32toh(response->result)) { 862 case 0: 863 data->data0 = be32toh(response->data); 864 err = 0; 865 break; 866 case 0xffffffff: 867 DELAY(1000); 868 break; 869 case MXGEFW_CMD_UNKNOWN: 870 err = ENOSYS; 871 break; 872 case MXGEFW_CMD_ERROR_UNALIGNED: 873 err = E2BIG; 874 break; 875 case MXGEFW_CMD_ERROR_BUSY: 876 err = EBUSY; 877 break; 878 case MXGEFW_CMD_ERROR_I2C_ABSENT: 879 err = ENXIO; 880 break; 881 default: 882 device_printf(sc->dev, 883 "mxge: command %d " 884 "failed, result = %d\n", 885 cmd, be32toh(response->result)); 886 err = ENXIO; 887 break; 888 } 889 if (err != EAGAIN) 890 break; 891 } 892 if (err == EAGAIN) 893 device_printf(sc->dev, "mxge: command %d timed out" 894 "result = %d\n", 895 cmd, be32toh(response->result)); 896 mtx_unlock(&sc->cmd_mtx); 897 return err; 898 } 899 900 static int 901 mxge_adopt_running_firmware(mxge_softc_t *sc) 902 { 903 struct mcp_gen_header *hdr; 904 const size_t bytes = sizeof (struct mcp_gen_header); 905 size_t hdr_offset; 906 int status; 907 908 /* find running firmware header */ 909 hdr_offset = htobe32(*(volatile uint32_t *) 910 (sc->sram + MCP_HEADER_PTR_OFFSET)); 911 912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 913 device_printf(sc->dev, 914 "Running firmware has bad header offset (%d)\n", 915 (int)hdr_offset); 916 return EIO; 917 } 918 919 /* copy header of running firmware from SRAM to host memory to 920 * validate firmware */ 921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 922 if (hdr == NULL) { 923 device_printf(sc->dev, "could not malloc firmware hdr\n"); 924 return ENOMEM; 925 } 926 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 927 rman_get_bushandle(sc->mem_res), 928 hdr_offset, (char *)hdr, bytes); 929 status = mxge_validate_firmware(sc, hdr); 930 free(hdr, M_DEVBUF); 931 932 /* 933 * check to see if adopted firmware has bug where adopting 934 * it will cause broadcasts to be filtered unless the NIC 935 * is kept in ALLMULTI mode 936 */ 937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 939 sc->adopted_rx_filter_bug = 1; 940 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 941 "working around rx filter bug\n", 942 sc->fw_ver_major, sc->fw_ver_minor, 943 sc->fw_ver_tiny); 944 } 945 946 return status; 947 } 948 949 950 static int 951 mxge_load_firmware(mxge_softc_t *sc, int adopt) 952 { 953 volatile uint32_t *confirm; 954 volatile char *submit; 955 char buf_bytes[72]; 956 uint32_t *buf, size, dma_low, dma_high; 957 int status, i; 958 959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 960 961 size = sc->sram_size; 962 status = mxge_load_firmware_helper(sc, &size); 963 if (status) { 964 if (!adopt) 965 return status; 966 /* Try to use the currently running firmware, if 967 it is new enough */ 968 status = mxge_adopt_running_firmware(sc); 969 if (status) { 970 device_printf(sc->dev, 971 "failed to adopt running firmware\n"); 972 return status; 973 } 974 device_printf(sc->dev, 975 "Successfully adopted running firmware\n"); 976 if (sc->tx_boundary == 4096) { 977 device_printf(sc->dev, 978 "Using firmware currently running on NIC" 979 ". For optimal\n"); 980 device_printf(sc->dev, 981 "performance consider loading optimized " 982 "firmware\n"); 983 } 984 sc->fw_name = mxge_fw_unaligned; 985 sc->tx_boundary = 2048; 986 return 0; 987 } 988 /* clear confirmation addr */ 989 confirm = (volatile uint32_t *)sc->cmd; 990 *confirm = 0; 991 wmb(); 992 /* send a reload command to the bootstrap MCP, and wait for the 993 response in the confirmation address. The firmware should 994 write a -1 there to indicate it is alive and well 995 */ 996 997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 999 1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1002 buf[2] = htobe32(0xffffffff); /* confirm data */ 1003 1004 /* FIX: All newest firmware should un-protect the bottom of 1005 the sram before handoff. However, the very first interfaces 1006 do not. Therefore the handoff copy must skip the first 8 bytes 1007 */ 1008 /* where the code starts*/ 1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1010 buf[4] = htobe32(size - 8); /* length of code */ 1011 buf[5] = htobe32(8); /* where to copy to */ 1012 buf[6] = htobe32(0); /* where to jump to */ 1013 1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1015 mxge_pio_copy(submit, buf, 64); 1016 wmb(); 1017 DELAY(1000); 1018 wmb(); 1019 i = 0; 1020 while (*confirm != 0xffffffff && i < 20) { 1021 DELAY(1000*10); 1022 i++; 1023 bus_dmamap_sync(sc->cmd_dma.dmat, 1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1025 } 1026 if (*confirm != 0xffffffff) { 1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1028 confirm, *confirm); 1029 1030 return ENXIO; 1031 } 1032 return 0; 1033 } 1034 1035 static int 1036 mxge_update_mac_address(mxge_softc_t *sc) 1037 { 1038 mxge_cmd_t cmd; 1039 uint8_t *addr = sc->mac_addr; 1040 int status; 1041 1042 1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1044 | (addr[2] << 8) | addr[3]); 1045 1046 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1047 1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1049 return status; 1050 } 1051 1052 static int 1053 mxge_change_pause(mxge_softc_t *sc, int pause) 1054 { 1055 mxge_cmd_t cmd; 1056 int status; 1057 1058 if (pause) 1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1060 &cmd); 1061 else 1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1063 &cmd); 1064 1065 if (status) { 1066 device_printf(sc->dev, "Failed to set flow control mode\n"); 1067 return ENXIO; 1068 } 1069 sc->pause = pause; 1070 return 0; 1071 } 1072 1073 static void 1074 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1075 { 1076 mxge_cmd_t cmd; 1077 int status; 1078 1079 if (mxge_always_promisc) 1080 promisc = 1; 1081 1082 if (promisc) 1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1084 &cmd); 1085 else 1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1087 &cmd); 1088 1089 if (status) { 1090 device_printf(sc->dev, "Failed to set promisc mode\n"); 1091 } 1092 } 1093 1094 static void 1095 mxge_set_multicast_list(mxge_softc_t *sc) 1096 { 1097 mxge_cmd_t cmd; 1098 struct ifmultiaddr *ifma; 1099 struct ifnet *ifp = sc->ifp; 1100 int err; 1101 1102 /* This firmware is known to not support multicast */ 1103 if (!sc->fw_multicast_support) 1104 return; 1105 1106 /* Disable multicast filtering while we play with the lists*/ 1107 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1108 if (err != 0) { 1109 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1110 " error status: %d\n", err); 1111 return; 1112 } 1113 1114 if (sc->adopted_rx_filter_bug) 1115 return; 1116 1117 if (ifp->if_flags & IFF_ALLMULTI) 1118 /* request to disable multicast filtering, so quit here */ 1119 return; 1120 1121 /* Flush all the filters */ 1122 1123 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1124 if (err != 0) { 1125 device_printf(sc->dev, 1126 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1127 ", error status: %d\n", err); 1128 return; 1129 } 1130 1131 /* Walk the multicast list, and add each address */ 1132 1133 if_maddr_rlock(ifp); 1134 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1135 if (ifma->ifma_addr->sa_family != AF_LINK) 1136 continue; 1137 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1138 &cmd.data0, 4); 1139 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1140 &cmd.data1, 2); 1141 cmd.data0 = htonl(cmd.data0); 1142 cmd.data1 = htonl(cmd.data1); 1143 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1144 if (err != 0) { 1145 device_printf(sc->dev, "Failed " 1146 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1147 "%d\t", err); 1148 /* abort, leaving multicast filtering off */ 1149 if_maddr_runlock(ifp); 1150 return; 1151 } 1152 } 1153 if_maddr_runlock(ifp); 1154 /* Enable multicast filtering */ 1155 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1156 if (err != 0) { 1157 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1158 ", error status: %d\n", err); 1159 } 1160 } 1161 1162 static int 1163 mxge_max_mtu(mxge_softc_t *sc) 1164 { 1165 mxge_cmd_t cmd; 1166 int status; 1167 1168 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1169 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1170 1171 /* try to set nbufs to see if it we can 1172 use virtually contiguous jumbos */ 1173 cmd.data0 = 0; 1174 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1175 &cmd); 1176 if (status == 0) 1177 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1178 1179 /* otherwise, we're limited to MJUMPAGESIZE */ 1180 return MJUMPAGESIZE - MXGEFW_PAD; 1181 } 1182 1183 static int 1184 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1185 { 1186 struct mxge_slice_state *ss; 1187 mxge_rx_done_t *rx_done; 1188 volatile uint32_t *irq_claim; 1189 mxge_cmd_t cmd; 1190 int slice, status; 1191 1192 /* try to send a reset command to the card to see if it 1193 is alive */ 1194 memset(&cmd, 0, sizeof (cmd)); 1195 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1196 if (status != 0) { 1197 device_printf(sc->dev, "failed reset\n"); 1198 return ENXIO; 1199 } 1200 1201 mxge_dummy_rdma(sc, 1); 1202 1203 1204 /* set the intrq size */ 1205 cmd.data0 = sc->rx_ring_size; 1206 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1207 1208 /* 1209 * Even though we already know how many slices are supported 1210 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1211 * has magic side effects, and must be called after a reset. 1212 * It must be called prior to calling any RSS related cmds, 1213 * including assigning an interrupt queue for anything but 1214 * slice 0. It must also be called *after* 1215 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1216 * the firmware to compute offsets. 1217 */ 1218 1219 if (sc->num_slices > 1) { 1220 /* ask the maximum number of slices it supports */ 1221 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1222 &cmd); 1223 if (status != 0) { 1224 device_printf(sc->dev, 1225 "failed to get number of slices\n"); 1226 return status; 1227 } 1228 /* 1229 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1230 * to setting up the interrupt queue DMA 1231 */ 1232 cmd.data0 = sc->num_slices; 1233 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1234 #ifdef IFNET_BUF_RING 1235 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1236 #endif 1237 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1238 &cmd); 1239 if (status != 0) { 1240 device_printf(sc->dev, 1241 "failed to set number of slices\n"); 1242 return status; 1243 } 1244 } 1245 1246 1247 if (interrupts_setup) { 1248 /* Now exchange information about interrupts */ 1249 for (slice = 0; slice < sc->num_slices; slice++) { 1250 rx_done = &sc->ss[slice].rx_done; 1251 memset(rx_done->entry, 0, sc->rx_ring_size); 1252 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1253 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1254 cmd.data2 = slice; 1255 status |= mxge_send_cmd(sc, 1256 MXGEFW_CMD_SET_INTRQ_DMA, 1257 &cmd); 1258 } 1259 } 1260 1261 status |= mxge_send_cmd(sc, 1262 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1263 1264 1265 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1266 1267 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1268 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1269 1270 1271 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1272 &cmd); 1273 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 if (status != 0) { 1275 device_printf(sc->dev, "failed set interrupt parameters\n"); 1276 return status; 1277 } 1278 1279 1280 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1281 1282 1283 /* run a DMA benchmark */ 1284 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1285 1286 for (slice = 0; slice < sc->num_slices; slice++) { 1287 ss = &sc->ss[slice]; 1288 1289 ss->irq_claim = irq_claim + (2 * slice); 1290 /* reset mcp/driver shared state back to 0 */ 1291 ss->rx_done.idx = 0; 1292 ss->rx_done.cnt = 0; 1293 ss->tx.req = 0; 1294 ss->tx.done = 0; 1295 ss->tx.pkt_done = 0; 1296 ss->tx.queue_active = 0; 1297 ss->tx.activate = 0; 1298 ss->tx.deactivate = 0; 1299 ss->tx.wake = 0; 1300 ss->tx.defrag = 0; 1301 ss->tx.stall = 0; 1302 ss->rx_big.cnt = 0; 1303 ss->rx_small.cnt = 0; 1304 ss->lc.lro_bad_csum = 0; 1305 ss->lc.lro_queued = 0; 1306 ss->lc.lro_flushed = 0; 1307 if (ss->fw_stats != NULL) { 1308 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1309 } 1310 } 1311 sc->rdma_tags_available = 15; 1312 status = mxge_update_mac_address(sc); 1313 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1314 mxge_change_pause(sc, sc->pause); 1315 mxge_set_multicast_list(sc); 1316 if (sc->throttle) { 1317 cmd.data0 = sc->throttle; 1318 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1319 &cmd)) { 1320 device_printf(sc->dev, 1321 "can't enable throttle\n"); 1322 } 1323 } 1324 return status; 1325 } 1326 1327 static int 1328 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1329 { 1330 mxge_cmd_t cmd; 1331 mxge_softc_t *sc; 1332 int err; 1333 unsigned int throttle; 1334 1335 sc = arg1; 1336 throttle = sc->throttle; 1337 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1338 if (err != 0) { 1339 return err; 1340 } 1341 1342 if (throttle == sc->throttle) 1343 return 0; 1344 1345 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1346 return EINVAL; 1347 1348 mtx_lock(&sc->driver_mtx); 1349 cmd.data0 = throttle; 1350 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1351 if (err == 0) 1352 sc->throttle = throttle; 1353 mtx_unlock(&sc->driver_mtx); 1354 return err; 1355 } 1356 1357 static int 1358 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1359 { 1360 mxge_softc_t *sc; 1361 unsigned int intr_coal_delay; 1362 int err; 1363 1364 sc = arg1; 1365 intr_coal_delay = sc->intr_coal_delay; 1366 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1367 if (err != 0) { 1368 return err; 1369 } 1370 if (intr_coal_delay == sc->intr_coal_delay) 1371 return 0; 1372 1373 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1374 return EINVAL; 1375 1376 mtx_lock(&sc->driver_mtx); 1377 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1378 sc->intr_coal_delay = intr_coal_delay; 1379 1380 mtx_unlock(&sc->driver_mtx); 1381 return err; 1382 } 1383 1384 static int 1385 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1386 { 1387 mxge_softc_t *sc; 1388 unsigned int enabled; 1389 int err; 1390 1391 sc = arg1; 1392 enabled = sc->pause; 1393 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1394 if (err != 0) { 1395 return err; 1396 } 1397 if (enabled == sc->pause) 1398 return 0; 1399 1400 mtx_lock(&sc->driver_mtx); 1401 err = mxge_change_pause(sc, enabled); 1402 mtx_unlock(&sc->driver_mtx); 1403 return err; 1404 } 1405 1406 static int 1407 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1408 { 1409 int err; 1410 1411 if (arg1 == NULL) 1412 return EFAULT; 1413 arg2 = be32toh(*(int *)arg1); 1414 arg1 = NULL; 1415 err = sysctl_handle_int(oidp, arg1, arg2, req); 1416 1417 return err; 1418 } 1419 1420 static void 1421 mxge_rem_sysctls(mxge_softc_t *sc) 1422 { 1423 struct mxge_slice_state *ss; 1424 int slice; 1425 1426 if (sc->slice_sysctl_tree == NULL) 1427 return; 1428 1429 for (slice = 0; slice < sc->num_slices; slice++) { 1430 ss = &sc->ss[slice]; 1431 if (ss == NULL || ss->sysctl_tree == NULL) 1432 continue; 1433 sysctl_ctx_free(&ss->sysctl_ctx); 1434 ss->sysctl_tree = NULL; 1435 } 1436 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1437 sc->slice_sysctl_tree = NULL; 1438 } 1439 1440 static void 1441 mxge_add_sysctls(mxge_softc_t *sc) 1442 { 1443 struct sysctl_ctx_list *ctx; 1444 struct sysctl_oid_list *children; 1445 mcp_irq_data_t *fw; 1446 struct mxge_slice_state *ss; 1447 int slice; 1448 char slice_num[8]; 1449 1450 ctx = device_get_sysctl_ctx(sc->dev); 1451 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1452 fw = sc->ss[0].fw_stats; 1453 1454 /* random information */ 1455 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1456 "firmware_version", 1457 CTLFLAG_RD, sc->fw_version, 1458 0, "firmware version"); 1459 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1460 "serial_number", 1461 CTLFLAG_RD, sc->serial_number_string, 1462 0, "serial number"); 1463 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1464 "product_code", 1465 CTLFLAG_RD, sc->product_code_string, 1466 0, "product_code"); 1467 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1468 "pcie_link_width", 1469 CTLFLAG_RD, &sc->link_width, 1470 0, "tx_boundary"); 1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1472 "tx_boundary", 1473 CTLFLAG_RD, &sc->tx_boundary, 1474 0, "tx_boundary"); 1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1476 "write_combine", 1477 CTLFLAG_RD, &sc->wc, 1478 0, "write combining PIO?"); 1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1480 "read_dma_MBs", 1481 CTLFLAG_RD, &sc->read_dma, 1482 0, "DMA Read speed in MB/s"); 1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1484 "write_dma_MBs", 1485 CTLFLAG_RD, &sc->write_dma, 1486 0, "DMA Write speed in MB/s"); 1487 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1488 "read_write_dma_MBs", 1489 CTLFLAG_RD, &sc->read_write_dma, 1490 0, "DMA concurrent Read/Write speed in MB/s"); 1491 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1492 "watchdog_resets", 1493 CTLFLAG_RD, &sc->watchdog_resets, 1494 0, "Number of times NIC was reset"); 1495 1496 1497 /* performance related tunables */ 1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1499 "intr_coal_delay", 1500 CTLTYPE_INT|CTLFLAG_RW, sc, 1501 0, mxge_change_intr_coal, 1502 "I", "interrupt coalescing delay in usecs"); 1503 1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1505 "throttle", 1506 CTLTYPE_INT|CTLFLAG_RW, sc, 1507 0, mxge_change_throttle, 1508 "I", "transmit throttling"); 1509 1510 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1511 "flow_control_enabled", 1512 CTLTYPE_INT|CTLFLAG_RW, sc, 1513 0, mxge_change_flow_control, 1514 "I", "interrupt coalescing delay in usecs"); 1515 1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1517 "deassert_wait", 1518 CTLFLAG_RW, &mxge_deassert_wait, 1519 0, "Wait for IRQ line to go low in ihandler"); 1520 1521 /* stats block from firmware is in network byte order. 1522 Need to swap it */ 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1524 "link_up", 1525 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1526 0, mxge_handle_be32, 1527 "I", "link up"); 1528 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1529 "rdma_tags_available", 1530 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1531 0, mxge_handle_be32, 1532 "I", "rdma_tags_available"); 1533 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1534 "dropped_bad_crc32", 1535 CTLTYPE_INT|CTLFLAG_RD, 1536 &fw->dropped_bad_crc32, 1537 0, mxge_handle_be32, 1538 "I", "dropped_bad_crc32"); 1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1540 "dropped_bad_phy", 1541 CTLTYPE_INT|CTLFLAG_RD, 1542 &fw->dropped_bad_phy, 1543 0, mxge_handle_be32, 1544 "I", "dropped_bad_phy"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_link_error_or_filtered", 1547 CTLTYPE_INT|CTLFLAG_RD, 1548 &fw->dropped_link_error_or_filtered, 1549 0, mxge_handle_be32, 1550 "I", "dropped_link_error_or_filtered"); 1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1552 "dropped_link_overflow", 1553 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1554 0, mxge_handle_be32, 1555 "I", "dropped_link_overflow"); 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "dropped_multicast_filtered", 1558 CTLTYPE_INT|CTLFLAG_RD, 1559 &fw->dropped_multicast_filtered, 1560 0, mxge_handle_be32, 1561 "I", "dropped_multicast_filtered"); 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "dropped_no_big_buffer", 1564 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1565 0, mxge_handle_be32, 1566 "I", "dropped_no_big_buffer"); 1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1568 "dropped_no_small_buffer", 1569 CTLTYPE_INT|CTLFLAG_RD, 1570 &fw->dropped_no_small_buffer, 1571 0, mxge_handle_be32, 1572 "I", "dropped_no_small_buffer"); 1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1574 "dropped_overrun", 1575 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1576 0, mxge_handle_be32, 1577 "I", "dropped_overrun"); 1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1579 "dropped_pause", 1580 CTLTYPE_INT|CTLFLAG_RD, 1581 &fw->dropped_pause, 1582 0, mxge_handle_be32, 1583 "I", "dropped_pause"); 1584 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1585 "dropped_runt", 1586 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1587 0, mxge_handle_be32, 1588 "I", "dropped_runt"); 1589 1590 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1591 "dropped_unicast_filtered", 1592 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1593 0, mxge_handle_be32, 1594 "I", "dropped_unicast_filtered"); 1595 1596 /* verbose printing? */ 1597 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1598 "verbose", 1599 CTLFLAG_RW, &mxge_verbose, 1600 0, "verbose printing"); 1601 1602 /* add counters exported for debugging from all slices */ 1603 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1604 sc->slice_sysctl_tree = 1605 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1606 "slice", CTLFLAG_RD, 0, ""); 1607 1608 for (slice = 0; slice < sc->num_slices; slice++) { 1609 ss = &sc->ss[slice]; 1610 sysctl_ctx_init(&ss->sysctl_ctx); 1611 ctx = &ss->sysctl_ctx; 1612 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1613 sprintf(slice_num, "%d", slice); 1614 ss->sysctl_tree = 1615 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1616 CTLFLAG_RD, 0, ""); 1617 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1619 "rx_small_cnt", 1620 CTLFLAG_RD, &ss->rx_small.cnt, 1621 0, "rx_small_cnt"); 1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1623 "rx_big_cnt", 1624 CTLFLAG_RD, &ss->rx_big.cnt, 1625 0, "rx_small_cnt"); 1626 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1627 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1628 0, "number of lro merge queues flushed"); 1629 1630 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1631 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1632 0, "number of bad csums preventing LRO"); 1633 1634 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1635 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1636 0, "number of frames appended to lro merge" 1637 "queues"); 1638 1639 #ifndef IFNET_BUF_RING 1640 /* only transmit from slice 0 for now */ 1641 if (slice > 0) 1642 continue; 1643 #endif 1644 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1645 "tx_req", 1646 CTLFLAG_RD, &ss->tx.req, 1647 0, "tx_req"); 1648 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "tx_done", 1651 CTLFLAG_RD, &ss->tx.done, 1652 0, "tx_done"); 1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1654 "tx_pkt_done", 1655 CTLFLAG_RD, &ss->tx.pkt_done, 1656 0, "tx_done"); 1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1658 "tx_stall", 1659 CTLFLAG_RD, &ss->tx.stall, 1660 0, "tx_stall"); 1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1662 "tx_wake", 1663 CTLFLAG_RD, &ss->tx.wake, 1664 0, "tx_wake"); 1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1666 "tx_defrag", 1667 CTLFLAG_RD, &ss->tx.defrag, 1668 0, "tx_defrag"); 1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1670 "tx_queue_active", 1671 CTLFLAG_RD, &ss->tx.queue_active, 1672 0, "tx_queue_active"); 1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1674 "tx_activate", 1675 CTLFLAG_RD, &ss->tx.activate, 1676 0, "tx_activate"); 1677 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1678 "tx_deactivate", 1679 CTLFLAG_RD, &ss->tx.deactivate, 1680 0, "tx_deactivate"); 1681 } 1682 } 1683 1684 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1685 backwards one at a time and handle ring wraps */ 1686 1687 static inline void 1688 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1689 mcp_kreq_ether_send_t *src, int cnt) 1690 { 1691 int idx, starting_slot; 1692 starting_slot = tx->req; 1693 while (cnt > 1) { 1694 cnt--; 1695 idx = (starting_slot + cnt) & tx->mask; 1696 mxge_pio_copy(&tx->lanai[idx], 1697 &src[cnt], sizeof(*src)); 1698 wmb(); 1699 } 1700 } 1701 1702 /* 1703 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1704 * at most 32 bytes at a time, so as to avoid involving the software 1705 * pio handler in the nic. We re-write the first segment's flags 1706 * to mark them valid only after writing the entire chain 1707 */ 1708 1709 static inline void 1710 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1711 int cnt) 1712 { 1713 int idx, i; 1714 uint32_t *src_ints; 1715 volatile uint32_t *dst_ints; 1716 mcp_kreq_ether_send_t *srcp; 1717 volatile mcp_kreq_ether_send_t *dstp, *dst; 1718 uint8_t last_flags; 1719 1720 idx = tx->req & tx->mask; 1721 1722 last_flags = src->flags; 1723 src->flags = 0; 1724 wmb(); 1725 dst = dstp = &tx->lanai[idx]; 1726 srcp = src; 1727 1728 if ((idx + cnt) < tx->mask) { 1729 for (i = 0; i < (cnt - 1); i += 2) { 1730 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1731 wmb(); /* force write every 32 bytes */ 1732 srcp += 2; 1733 dstp += 2; 1734 } 1735 } else { 1736 /* submit all but the first request, and ensure 1737 that it is submitted below */ 1738 mxge_submit_req_backwards(tx, src, cnt); 1739 i = 0; 1740 } 1741 if (i < cnt) { 1742 /* submit the first request */ 1743 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1744 wmb(); /* barrier before setting valid flag */ 1745 } 1746 1747 /* re-write the last 32-bits with the valid flags */ 1748 src->flags = last_flags; 1749 src_ints = (uint32_t *)src; 1750 src_ints+=3; 1751 dst_ints = (volatile uint32_t *)dst; 1752 dst_ints+=3; 1753 *dst_ints = *src_ints; 1754 tx->req += cnt; 1755 wmb(); 1756 } 1757 1758 static int 1759 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1760 struct mxge_pkt_info *pi) 1761 { 1762 struct ether_vlan_header *eh; 1763 uint16_t etype; 1764 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1765 #if IFCAP_TSO6 && defined(INET6) 1766 int nxt; 1767 #endif 1768 1769 eh = mtod(m, struct ether_vlan_header *); 1770 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1771 etype = ntohs(eh->evl_proto); 1772 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1773 } else { 1774 etype = ntohs(eh->evl_encap_proto); 1775 pi->ip_off = ETHER_HDR_LEN; 1776 } 1777 1778 switch (etype) { 1779 case ETHERTYPE_IP: 1780 /* 1781 * ensure ip header is in first mbuf, copy it to a 1782 * scratch buffer if not 1783 */ 1784 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1785 pi->ip6 = NULL; 1786 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1787 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1788 ss->scratch); 1789 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1790 } 1791 pi->ip_hlen = pi->ip->ip_hl << 2; 1792 if (!tso) 1793 return 0; 1794 1795 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1796 sizeof(struct tcphdr))) { 1797 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1798 sizeof(struct tcphdr), ss->scratch); 1799 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1800 } 1801 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1802 break; 1803 #if IFCAP_TSO6 && defined(INET6) 1804 case ETHERTYPE_IPV6: 1805 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1806 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1807 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1808 ss->scratch); 1809 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1810 } 1811 nxt = 0; 1812 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1813 pi->ip_hlen -= pi->ip_off; 1814 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1815 return EINVAL; 1816 1817 if (!tso) 1818 return 0; 1819 1820 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1821 return EINVAL; 1822 1823 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1824 sizeof(struct tcphdr))) { 1825 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1826 sizeof(struct tcphdr), ss->scratch); 1827 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1828 } 1829 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1830 break; 1831 #endif 1832 default: 1833 return EINVAL; 1834 } 1835 return 0; 1836 } 1837 1838 #if IFCAP_TSO4 1839 1840 static void 1841 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1842 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1843 { 1844 mxge_tx_ring_t *tx; 1845 mcp_kreq_ether_send_t *req; 1846 bus_dma_segment_t *seg; 1847 uint32_t low, high_swapped; 1848 int len, seglen, cum_len, cum_len_next; 1849 int next_is_first, chop, cnt, rdma_count, small; 1850 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1851 uint8_t flags, flags_next; 1852 static int once; 1853 1854 mss = m->m_pkthdr.tso_segsz; 1855 1856 /* negative cum_len signifies to the 1857 * send loop that we are still in the 1858 * header portion of the TSO packet. 1859 */ 1860 1861 cksum_offset = pi->ip_off + pi->ip_hlen; 1862 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1863 1864 /* TSO implies checksum offload on this hardware */ 1865 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1866 /* 1867 * If packet has full TCP csum, replace it with pseudo hdr 1868 * sum that the NIC expects, otherwise the NIC will emit 1869 * packets with bad TCP checksums. 1870 */ 1871 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1872 if (pi->ip6) { 1873 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1874 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1875 sum = in6_cksum_pseudo(pi->ip6, 1876 m->m_pkthdr.len - cksum_offset, 1877 IPPROTO_TCP, 0); 1878 #endif 1879 } else { 1880 #ifdef INET 1881 m->m_pkthdr.csum_flags |= CSUM_TCP; 1882 sum = in_pseudo(pi->ip->ip_src.s_addr, 1883 pi->ip->ip_dst.s_addr, 1884 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1885 cksum_offset))); 1886 #endif 1887 } 1888 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1889 cksum_offset, sizeof(sum), (caddr_t)&sum); 1890 } 1891 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1892 1893 1894 /* for TSO, pseudo_hdr_offset holds mss. 1895 * The firmware figures out where to put 1896 * the checksum by parsing the header. */ 1897 pseudo_hdr_offset = htobe16(mss); 1898 1899 if (pi->ip6) { 1900 /* 1901 * for IPv6 TSO, the "checksum offset" is re-purposed 1902 * to store the TCP header len 1903 */ 1904 cksum_offset = (pi->tcp->th_off << 2); 1905 } 1906 1907 tx = &ss->tx; 1908 req = tx->req_list; 1909 seg = tx->seg_list; 1910 cnt = 0; 1911 rdma_count = 0; 1912 /* "rdma_count" is the number of RDMAs belonging to the 1913 * current packet BEFORE the current send request. For 1914 * non-TSO packets, this is equal to "count". 1915 * For TSO packets, rdma_count needs to be reset 1916 * to 0 after a segment cut. 1917 * 1918 * The rdma_count field of the send request is 1919 * the number of RDMAs of the packet starting at 1920 * that request. For TSO send requests with one ore more cuts 1921 * in the middle, this is the number of RDMAs starting 1922 * after the last cut in the request. All previous 1923 * segments before the last cut implicitly have 1 RDMA. 1924 * 1925 * Since the number of RDMAs is not known beforehand, 1926 * it must be filled-in retroactively - after each 1927 * segmentation cut or at the end of the entire packet. 1928 */ 1929 1930 while (busdma_seg_cnt) { 1931 /* Break the busdma segment up into pieces*/ 1932 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1933 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1934 len = seg->ds_len; 1935 1936 while (len) { 1937 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1938 seglen = len; 1939 cum_len_next = cum_len + seglen; 1940 (req-rdma_count)->rdma_count = rdma_count + 1; 1941 if (__predict_true(cum_len >= 0)) { 1942 /* payload */ 1943 chop = (cum_len_next > mss); 1944 cum_len_next = cum_len_next % mss; 1945 next_is_first = (cum_len_next == 0); 1946 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1947 flags_next |= next_is_first * 1948 MXGEFW_FLAGS_FIRST; 1949 rdma_count |= -(chop | next_is_first); 1950 rdma_count += chop & !next_is_first; 1951 } else if (cum_len_next >= 0) { 1952 /* header ends */ 1953 rdma_count = -1; 1954 cum_len_next = 0; 1955 seglen = -cum_len; 1956 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1957 flags_next = MXGEFW_FLAGS_TSO_PLD | 1958 MXGEFW_FLAGS_FIRST | 1959 (small * MXGEFW_FLAGS_SMALL); 1960 } 1961 1962 req->addr_high = high_swapped; 1963 req->addr_low = htobe32(low); 1964 req->pseudo_hdr_offset = pseudo_hdr_offset; 1965 req->pad = 0; 1966 req->rdma_count = 1; 1967 req->length = htobe16(seglen); 1968 req->cksum_offset = cksum_offset; 1969 req->flags = flags | ((cum_len & 1) * 1970 MXGEFW_FLAGS_ALIGN_ODD); 1971 low += seglen; 1972 len -= seglen; 1973 cum_len = cum_len_next; 1974 flags = flags_next; 1975 req++; 1976 cnt++; 1977 rdma_count++; 1978 if (cksum_offset != 0 && !pi->ip6) { 1979 if (__predict_false(cksum_offset > seglen)) 1980 cksum_offset -= seglen; 1981 else 1982 cksum_offset = 0; 1983 } 1984 if (__predict_false(cnt > tx->max_desc)) 1985 goto drop; 1986 } 1987 busdma_seg_cnt--; 1988 seg++; 1989 } 1990 (req-rdma_count)->rdma_count = rdma_count; 1991 1992 do { 1993 req--; 1994 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1995 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1996 1997 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1998 mxge_submit_req(tx, tx->req_list, cnt); 1999 #ifdef IFNET_BUF_RING 2000 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2001 /* tell the NIC to start polling this slice */ 2002 *tx->send_go = 1; 2003 tx->queue_active = 1; 2004 tx->activate++; 2005 wmb(); 2006 } 2007 #endif 2008 return; 2009 2010 drop: 2011 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2012 m_freem(m); 2013 ss->oerrors++; 2014 if (!once) { 2015 printf("tx->max_desc exceeded via TSO!\n"); 2016 printf("mss = %d, %ld, %d!\n", mss, 2017 (long)seg - (long)tx->seg_list, tx->max_desc); 2018 once = 1; 2019 } 2020 return; 2021 2022 } 2023 2024 #endif /* IFCAP_TSO4 */ 2025 2026 #ifdef MXGE_NEW_VLAN_API 2027 /* 2028 * We reproduce the software vlan tag insertion from 2029 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2030 * vlan tag insertion. We need to advertise this in order to have the 2031 * vlan interface respect our csum offload flags. 2032 */ 2033 static struct mbuf * 2034 mxge_vlan_tag_insert(struct mbuf *m) 2035 { 2036 struct ether_vlan_header *evl; 2037 2038 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2039 if (__predict_false(m == NULL)) 2040 return NULL; 2041 if (m->m_len < sizeof(*evl)) { 2042 m = m_pullup(m, sizeof(*evl)); 2043 if (__predict_false(m == NULL)) 2044 return NULL; 2045 } 2046 /* 2047 * Transform the Ethernet header into an Ethernet header 2048 * with 802.1Q encapsulation. 2049 */ 2050 evl = mtod(m, struct ether_vlan_header *); 2051 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2052 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2053 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2054 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2055 m->m_flags &= ~M_VLANTAG; 2056 return m; 2057 } 2058 #endif /* MXGE_NEW_VLAN_API */ 2059 2060 static void 2061 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2062 { 2063 struct mxge_pkt_info pi = {0,0,0,0}; 2064 mxge_softc_t *sc; 2065 mcp_kreq_ether_send_t *req; 2066 bus_dma_segment_t *seg; 2067 struct mbuf *m_tmp; 2068 struct ifnet *ifp; 2069 mxge_tx_ring_t *tx; 2070 int cnt, cum_len, err, i, idx, odd_flag; 2071 uint16_t pseudo_hdr_offset; 2072 uint8_t flags, cksum_offset; 2073 2074 2075 sc = ss->sc; 2076 ifp = sc->ifp; 2077 tx = &ss->tx; 2078 2079 #ifdef MXGE_NEW_VLAN_API 2080 if (m->m_flags & M_VLANTAG) { 2081 m = mxge_vlan_tag_insert(m); 2082 if (__predict_false(m == NULL)) 2083 goto drop_without_m; 2084 } 2085 #endif 2086 if (m->m_pkthdr.csum_flags & 2087 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2088 if (mxge_parse_tx(ss, m, &pi)) 2089 goto drop; 2090 } 2091 2092 /* (try to) map the frame for DMA */ 2093 idx = tx->req & tx->mask; 2094 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2095 m, tx->seg_list, &cnt, 2096 BUS_DMA_NOWAIT); 2097 if (__predict_false(err == EFBIG)) { 2098 /* Too many segments in the chain. Try 2099 to defrag */ 2100 m_tmp = m_defrag(m, M_NOWAIT); 2101 if (m_tmp == NULL) { 2102 goto drop; 2103 } 2104 ss->tx.defrag++; 2105 m = m_tmp; 2106 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2107 tx->info[idx].map, 2108 m, tx->seg_list, &cnt, 2109 BUS_DMA_NOWAIT); 2110 } 2111 if (__predict_false(err != 0)) { 2112 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2113 " packet len = %d\n", err, m->m_pkthdr.len); 2114 goto drop; 2115 } 2116 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2117 BUS_DMASYNC_PREWRITE); 2118 tx->info[idx].m = m; 2119 2120 #if IFCAP_TSO4 2121 /* TSO is different enough, we handle it in another routine */ 2122 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2123 mxge_encap_tso(ss, m, cnt, &pi); 2124 return; 2125 } 2126 #endif 2127 2128 req = tx->req_list; 2129 cksum_offset = 0; 2130 pseudo_hdr_offset = 0; 2131 flags = MXGEFW_FLAGS_NO_TSO; 2132 2133 /* checksum offloading? */ 2134 if (m->m_pkthdr.csum_flags & 2135 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2136 /* ensure ip header is in first mbuf, copy 2137 it to a scratch buffer if not */ 2138 cksum_offset = pi.ip_off + pi.ip_hlen; 2139 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2140 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2141 req->cksum_offset = cksum_offset; 2142 flags |= MXGEFW_FLAGS_CKSUM; 2143 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2144 } else { 2145 odd_flag = 0; 2146 } 2147 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2148 flags |= MXGEFW_FLAGS_SMALL; 2149 2150 /* convert segments into a request list */ 2151 cum_len = 0; 2152 seg = tx->seg_list; 2153 req->flags = MXGEFW_FLAGS_FIRST; 2154 for (i = 0; i < cnt; i++) { 2155 req->addr_low = 2156 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2157 req->addr_high = 2158 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2159 req->length = htobe16(seg->ds_len); 2160 req->cksum_offset = cksum_offset; 2161 if (cksum_offset > seg->ds_len) 2162 cksum_offset -= seg->ds_len; 2163 else 2164 cksum_offset = 0; 2165 req->pseudo_hdr_offset = pseudo_hdr_offset; 2166 req->pad = 0; /* complete solid 16-byte block */ 2167 req->rdma_count = 1; 2168 req->flags |= flags | ((cum_len & 1) * odd_flag); 2169 cum_len += seg->ds_len; 2170 seg++; 2171 req++; 2172 req->flags = 0; 2173 } 2174 req--; 2175 /* pad runts to 60 bytes */ 2176 if (cum_len < 60) { 2177 req++; 2178 req->addr_low = 2179 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2180 req->addr_high = 2181 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2182 req->length = htobe16(60 - cum_len); 2183 req->cksum_offset = 0; 2184 req->pseudo_hdr_offset = pseudo_hdr_offset; 2185 req->pad = 0; /* complete solid 16-byte block */ 2186 req->rdma_count = 1; 2187 req->flags |= flags | ((cum_len & 1) * odd_flag); 2188 cnt++; 2189 } 2190 2191 tx->req_list[0].rdma_count = cnt; 2192 #if 0 2193 /* print what the firmware will see */ 2194 for (i = 0; i < cnt; i++) { 2195 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2196 "cso:%d, flags:0x%x, rdma:%d\n", 2197 i, (int)ntohl(tx->req_list[i].addr_high), 2198 (int)ntohl(tx->req_list[i].addr_low), 2199 (int)ntohs(tx->req_list[i].length), 2200 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2201 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2202 tx->req_list[i].rdma_count); 2203 } 2204 printf("--------------\n"); 2205 #endif 2206 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2207 mxge_submit_req(tx, tx->req_list, cnt); 2208 #ifdef IFNET_BUF_RING 2209 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2210 /* tell the NIC to start polling this slice */ 2211 *tx->send_go = 1; 2212 tx->queue_active = 1; 2213 tx->activate++; 2214 wmb(); 2215 } 2216 #endif 2217 return; 2218 2219 drop: 2220 m_freem(m); 2221 drop_without_m: 2222 ss->oerrors++; 2223 return; 2224 } 2225 2226 #ifdef IFNET_BUF_RING 2227 static void 2228 mxge_qflush(struct ifnet *ifp) 2229 { 2230 mxge_softc_t *sc = ifp->if_softc; 2231 mxge_tx_ring_t *tx; 2232 struct mbuf *m; 2233 int slice; 2234 2235 for (slice = 0; slice < sc->num_slices; slice++) { 2236 tx = &sc->ss[slice].tx; 2237 mtx_lock(&tx->mtx); 2238 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2239 m_freem(m); 2240 mtx_unlock(&tx->mtx); 2241 } 2242 if_qflush(ifp); 2243 } 2244 2245 static inline void 2246 mxge_start_locked(struct mxge_slice_state *ss) 2247 { 2248 mxge_softc_t *sc; 2249 struct mbuf *m; 2250 struct ifnet *ifp; 2251 mxge_tx_ring_t *tx; 2252 2253 sc = ss->sc; 2254 ifp = sc->ifp; 2255 tx = &ss->tx; 2256 2257 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2258 m = drbr_dequeue(ifp, tx->br); 2259 if (m == NULL) { 2260 return; 2261 } 2262 /* let BPF see it */ 2263 BPF_MTAP(ifp, m); 2264 2265 /* give it to the nic */ 2266 mxge_encap(ss, m); 2267 } 2268 /* ran out of transmit slots */ 2269 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2270 && (!drbr_empty(ifp, tx->br))) { 2271 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2272 tx->stall++; 2273 } 2274 } 2275 2276 static int 2277 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2278 { 2279 mxge_softc_t *sc; 2280 struct ifnet *ifp; 2281 mxge_tx_ring_t *tx; 2282 int err; 2283 2284 sc = ss->sc; 2285 ifp = sc->ifp; 2286 tx = &ss->tx; 2287 2288 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2289 IFF_DRV_RUNNING) { 2290 err = drbr_enqueue(ifp, tx->br, m); 2291 return (err); 2292 } 2293 2294 if (!drbr_needs_enqueue(ifp, tx->br) && 2295 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2296 /* let BPF see it */ 2297 BPF_MTAP(ifp, m); 2298 /* give it to the nic */ 2299 mxge_encap(ss, m); 2300 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2301 return (err); 2302 } 2303 if (!drbr_empty(ifp, tx->br)) 2304 mxge_start_locked(ss); 2305 return (0); 2306 } 2307 2308 static int 2309 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2310 { 2311 mxge_softc_t *sc = ifp->if_softc; 2312 struct mxge_slice_state *ss; 2313 mxge_tx_ring_t *tx; 2314 int err = 0; 2315 int slice; 2316 2317 slice = m->m_pkthdr.flowid; 2318 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2319 2320 ss = &sc->ss[slice]; 2321 tx = &ss->tx; 2322 2323 if (mtx_trylock(&tx->mtx)) { 2324 err = mxge_transmit_locked(ss, m); 2325 mtx_unlock(&tx->mtx); 2326 } else { 2327 err = drbr_enqueue(ifp, tx->br, m); 2328 } 2329 2330 return (err); 2331 } 2332 2333 #else 2334 2335 static inline void 2336 mxge_start_locked(struct mxge_slice_state *ss) 2337 { 2338 mxge_softc_t *sc; 2339 struct mbuf *m; 2340 struct ifnet *ifp; 2341 mxge_tx_ring_t *tx; 2342 2343 sc = ss->sc; 2344 ifp = sc->ifp; 2345 tx = &ss->tx; 2346 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2347 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2348 if (m == NULL) { 2349 return; 2350 } 2351 /* let BPF see it */ 2352 BPF_MTAP(ifp, m); 2353 2354 /* give it to the nic */ 2355 mxge_encap(ss, m); 2356 } 2357 /* ran out of transmit slots */ 2358 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2359 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2360 tx->stall++; 2361 } 2362 } 2363 #endif 2364 static void 2365 mxge_start(struct ifnet *ifp) 2366 { 2367 mxge_softc_t *sc = ifp->if_softc; 2368 struct mxge_slice_state *ss; 2369 2370 /* only use the first slice for now */ 2371 ss = &sc->ss[0]; 2372 mtx_lock(&ss->tx.mtx); 2373 mxge_start_locked(ss); 2374 mtx_unlock(&ss->tx.mtx); 2375 } 2376 2377 /* 2378 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2379 * at most 32 bytes at a time, so as to avoid involving the software 2380 * pio handler in the nic. We re-write the first segment's low 2381 * DMA address to mark it valid only after we write the entire chunk 2382 * in a burst 2383 */ 2384 static inline void 2385 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2386 mcp_kreq_ether_recv_t *src) 2387 { 2388 uint32_t low; 2389 2390 low = src->addr_low; 2391 src->addr_low = 0xffffffff; 2392 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2393 wmb(); 2394 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2395 wmb(); 2396 src->addr_low = low; 2397 dst->addr_low = low; 2398 wmb(); 2399 } 2400 2401 static int 2402 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2403 { 2404 bus_dma_segment_t seg; 2405 struct mbuf *m; 2406 mxge_rx_ring_t *rx = &ss->rx_small; 2407 int cnt, err; 2408 2409 m = m_gethdr(M_NOWAIT, MT_DATA); 2410 if (m == NULL) { 2411 rx->alloc_fail++; 2412 err = ENOBUFS; 2413 goto done; 2414 } 2415 m->m_len = MHLEN; 2416 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2417 &seg, &cnt, BUS_DMA_NOWAIT); 2418 if (err != 0) { 2419 m_free(m); 2420 goto done; 2421 } 2422 rx->info[idx].m = m; 2423 rx->shadow[idx].addr_low = 2424 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2425 rx->shadow[idx].addr_high = 2426 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2427 2428 done: 2429 if ((idx & 7) == 7) 2430 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2431 return err; 2432 } 2433 2434 static int 2435 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2436 { 2437 bus_dma_segment_t seg[3]; 2438 struct mbuf *m; 2439 mxge_rx_ring_t *rx = &ss->rx_big; 2440 int cnt, err, i; 2441 2442 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2443 if (m == NULL) { 2444 rx->alloc_fail++; 2445 err = ENOBUFS; 2446 goto done; 2447 } 2448 m->m_len = rx->mlen; 2449 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2450 seg, &cnt, BUS_DMA_NOWAIT); 2451 if (err != 0) { 2452 m_free(m); 2453 goto done; 2454 } 2455 rx->info[idx].m = m; 2456 rx->shadow[idx].addr_low = 2457 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2458 rx->shadow[idx].addr_high = 2459 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2460 2461 #if MXGE_VIRT_JUMBOS 2462 for (i = 1; i < cnt; i++) { 2463 rx->shadow[idx + i].addr_low = 2464 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2465 rx->shadow[idx + i].addr_high = 2466 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2467 } 2468 #endif 2469 2470 done: 2471 for (i = 0; i < rx->nbufs; i++) { 2472 if ((idx & 7) == 7) { 2473 mxge_submit_8rx(&rx->lanai[idx - 7], 2474 &rx->shadow[idx - 7]); 2475 } 2476 idx++; 2477 } 2478 return err; 2479 } 2480 2481 #ifdef INET6 2482 2483 static uint16_t 2484 mxge_csum_generic(uint16_t *raw, int len) 2485 { 2486 uint32_t csum; 2487 2488 2489 csum = 0; 2490 while (len > 0) { 2491 csum += *raw; 2492 raw++; 2493 len -= 2; 2494 } 2495 csum = (csum >> 16) + (csum & 0xffff); 2496 csum = (csum >> 16) + (csum & 0xffff); 2497 return (uint16_t)csum; 2498 } 2499 2500 static inline uint16_t 2501 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2502 { 2503 uint32_t partial; 2504 int nxt, cksum_offset; 2505 struct ip6_hdr *ip6 = p; 2506 uint16_t c; 2507 2508 nxt = ip6->ip6_nxt; 2509 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2510 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2511 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2512 IPPROTO_IPV6, &nxt); 2513 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2514 return (1); 2515 } 2516 2517 /* 2518 * IPv6 headers do not contain a checksum, and hence 2519 * do not checksum to zero, so they don't "fall out" 2520 * of the partial checksum calculation like IPv4 2521 * headers do. We need to fix the partial checksum by 2522 * subtracting the checksum of the IPv6 header. 2523 */ 2524 2525 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2526 ETHER_HDR_LEN); 2527 csum += ~partial; 2528 csum += (csum < ~partial); 2529 csum = (csum >> 16) + (csum & 0xFFFF); 2530 csum = (csum >> 16) + (csum & 0xFFFF); 2531 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2532 csum); 2533 c ^= 0xffff; 2534 return (c); 2535 } 2536 #endif /* INET6 */ 2537 /* 2538 * Myri10GE hardware checksums are not valid if the sender 2539 * padded the frame with non-zero padding. This is because 2540 * the firmware just does a simple 16-bit 1s complement 2541 * checksum across the entire frame, excluding the first 14 2542 * bytes. It is best to simply to check the checksum and 2543 * tell the stack about it only if the checksum is good 2544 */ 2545 2546 static inline uint16_t 2547 mxge_rx_csum(struct mbuf *m, int csum) 2548 { 2549 struct ether_header *eh; 2550 #ifdef INET 2551 struct ip *ip; 2552 #endif 2553 #if defined(INET) || defined(INET6) 2554 int cap = m->m_pkthdr.rcvif->if_capenable; 2555 #endif 2556 uint16_t c, etype; 2557 2558 2559 eh = mtod(m, struct ether_header *); 2560 etype = ntohs(eh->ether_type); 2561 switch (etype) { 2562 #ifdef INET 2563 case ETHERTYPE_IP: 2564 if ((cap & IFCAP_RXCSUM) == 0) 2565 return (1); 2566 ip = (struct ip *)(eh + 1); 2567 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2568 return (1); 2569 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2570 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2571 (ip->ip_hl << 2) + ip->ip_p)); 2572 c ^= 0xffff; 2573 break; 2574 #endif 2575 #ifdef INET6 2576 case ETHERTYPE_IPV6: 2577 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2578 return (1); 2579 c = mxge_rx_csum6((eh + 1), m, csum); 2580 break; 2581 #endif 2582 default: 2583 c = 1; 2584 } 2585 return (c); 2586 } 2587 2588 static void 2589 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2590 { 2591 struct ether_vlan_header *evl; 2592 struct ether_header *eh; 2593 uint32_t partial; 2594 2595 evl = mtod(m, struct ether_vlan_header *); 2596 eh = mtod(m, struct ether_header *); 2597 2598 /* 2599 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2600 * after what the firmware thought was the end of the ethernet 2601 * header. 2602 */ 2603 2604 /* put checksum into host byte order */ 2605 *csum = ntohs(*csum); 2606 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2607 (*csum) += ~partial; 2608 (*csum) += ((*csum) < ~partial); 2609 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2610 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2611 2612 /* restore checksum to network byte order; 2613 later consumers expect this */ 2614 *csum = htons(*csum); 2615 2616 /* save the tag */ 2617 #ifdef MXGE_NEW_VLAN_API 2618 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2619 #else 2620 { 2621 struct m_tag *mtag; 2622 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2623 M_NOWAIT); 2624 if (mtag == NULL) 2625 return; 2626 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2627 m_tag_prepend(m, mtag); 2628 } 2629 2630 #endif 2631 m->m_flags |= M_VLANTAG; 2632 2633 /* 2634 * Remove the 802.1q header by copying the Ethernet 2635 * addresses over it and adjusting the beginning of 2636 * the data in the mbuf. The encapsulated Ethernet 2637 * type field is already in place. 2638 */ 2639 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2640 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2641 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2642 } 2643 2644 2645 static inline void 2646 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2647 uint32_t csum, int lro) 2648 { 2649 mxge_softc_t *sc; 2650 struct ifnet *ifp; 2651 struct mbuf *m; 2652 struct ether_header *eh; 2653 mxge_rx_ring_t *rx; 2654 bus_dmamap_t old_map; 2655 int idx; 2656 2657 sc = ss->sc; 2658 ifp = sc->ifp; 2659 rx = &ss->rx_big; 2660 idx = rx->cnt & rx->mask; 2661 rx->cnt += rx->nbufs; 2662 /* save a pointer to the received mbuf */ 2663 m = rx->info[idx].m; 2664 /* try to replace the received mbuf */ 2665 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2666 /* drop the frame -- the old mbuf is re-cycled */ 2667 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2668 return; 2669 } 2670 2671 /* unmap the received buffer */ 2672 old_map = rx->info[idx].map; 2673 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2674 bus_dmamap_unload(rx->dmat, old_map); 2675 2676 /* swap the bus_dmamap_t's */ 2677 rx->info[idx].map = rx->extra_map; 2678 rx->extra_map = old_map; 2679 2680 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2681 * aligned */ 2682 m->m_data += MXGEFW_PAD; 2683 2684 m->m_pkthdr.rcvif = ifp; 2685 m->m_len = m->m_pkthdr.len = len; 2686 ss->ipackets++; 2687 eh = mtod(m, struct ether_header *); 2688 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2689 mxge_vlan_tag_remove(m, &csum); 2690 } 2691 /* flowid only valid if RSS hashing is enabled */ 2692 if (sc->num_slices > 1) { 2693 m->m_pkthdr.flowid = (ss - sc->ss); 2694 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2695 } 2696 /* if the checksum is valid, mark it in the mbuf header */ 2697 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2698 (0 == mxge_rx_csum(m, csum))) { 2699 /* Tell the stack that the checksum is good */ 2700 m->m_pkthdr.csum_data = 0xffff; 2701 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2702 CSUM_DATA_VALID; 2703 2704 #if defined(INET) || defined (INET6) 2705 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2706 return; 2707 #endif 2708 } 2709 /* pass the frame up the stack */ 2710 (*ifp->if_input)(ifp, m); 2711 } 2712 2713 static inline void 2714 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2715 uint32_t csum, int lro) 2716 { 2717 mxge_softc_t *sc; 2718 struct ifnet *ifp; 2719 struct ether_header *eh; 2720 struct mbuf *m; 2721 mxge_rx_ring_t *rx; 2722 bus_dmamap_t old_map; 2723 int idx; 2724 2725 sc = ss->sc; 2726 ifp = sc->ifp; 2727 rx = &ss->rx_small; 2728 idx = rx->cnt & rx->mask; 2729 rx->cnt++; 2730 /* save a pointer to the received mbuf */ 2731 m = rx->info[idx].m; 2732 /* try to replace the received mbuf */ 2733 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2734 /* drop the frame -- the old mbuf is re-cycled */ 2735 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2736 return; 2737 } 2738 2739 /* unmap the received buffer */ 2740 old_map = rx->info[idx].map; 2741 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2742 bus_dmamap_unload(rx->dmat, old_map); 2743 2744 /* swap the bus_dmamap_t's */ 2745 rx->info[idx].map = rx->extra_map; 2746 rx->extra_map = old_map; 2747 2748 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2749 * aligned */ 2750 m->m_data += MXGEFW_PAD; 2751 2752 m->m_pkthdr.rcvif = ifp; 2753 m->m_len = m->m_pkthdr.len = len; 2754 ss->ipackets++; 2755 eh = mtod(m, struct ether_header *); 2756 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2757 mxge_vlan_tag_remove(m, &csum); 2758 } 2759 /* flowid only valid if RSS hashing is enabled */ 2760 if (sc->num_slices > 1) { 2761 m->m_pkthdr.flowid = (ss - sc->ss); 2762 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2763 } 2764 /* if the checksum is valid, mark it in the mbuf header */ 2765 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2766 (0 == mxge_rx_csum(m, csum))) { 2767 /* Tell the stack that the checksum is good */ 2768 m->m_pkthdr.csum_data = 0xffff; 2769 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2770 CSUM_DATA_VALID; 2771 2772 #if defined(INET) || defined (INET6) 2773 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2774 return; 2775 #endif 2776 } 2777 /* pass the frame up the stack */ 2778 (*ifp->if_input)(ifp, m); 2779 } 2780 2781 static inline void 2782 mxge_clean_rx_done(struct mxge_slice_state *ss) 2783 { 2784 mxge_rx_done_t *rx_done = &ss->rx_done; 2785 int limit = 0; 2786 uint16_t length; 2787 uint16_t checksum; 2788 int lro; 2789 2790 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2791 while (rx_done->entry[rx_done->idx].length != 0) { 2792 length = ntohs(rx_done->entry[rx_done->idx].length); 2793 rx_done->entry[rx_done->idx].length = 0; 2794 checksum = rx_done->entry[rx_done->idx].checksum; 2795 if (length <= (MHLEN - MXGEFW_PAD)) 2796 mxge_rx_done_small(ss, length, checksum, lro); 2797 else 2798 mxge_rx_done_big(ss, length, checksum, lro); 2799 rx_done->cnt++; 2800 rx_done->idx = rx_done->cnt & rx_done->mask; 2801 2802 /* limit potential for livelock */ 2803 if (__predict_false(++limit > rx_done->mask / 2)) 2804 break; 2805 } 2806 #if defined(INET) || defined (INET6) 2807 tcp_lro_flush_all(&ss->lc); 2808 #endif 2809 } 2810 2811 2812 static inline void 2813 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2814 { 2815 struct ifnet *ifp; 2816 mxge_tx_ring_t *tx; 2817 struct mbuf *m; 2818 bus_dmamap_t map; 2819 int idx; 2820 int *flags; 2821 2822 tx = &ss->tx; 2823 ifp = ss->sc->ifp; 2824 while (tx->pkt_done != mcp_idx) { 2825 idx = tx->done & tx->mask; 2826 tx->done++; 2827 m = tx->info[idx].m; 2828 /* mbuf and DMA map only attached to the first 2829 segment per-mbuf */ 2830 if (m != NULL) { 2831 ss->obytes += m->m_pkthdr.len; 2832 if (m->m_flags & M_MCAST) 2833 ss->omcasts++; 2834 ss->opackets++; 2835 tx->info[idx].m = NULL; 2836 map = tx->info[idx].map; 2837 bus_dmamap_unload(tx->dmat, map); 2838 m_freem(m); 2839 } 2840 if (tx->info[idx].flag) { 2841 tx->info[idx].flag = 0; 2842 tx->pkt_done++; 2843 } 2844 } 2845 2846 /* If we have space, clear IFF_OACTIVE to tell the stack that 2847 its OK to send packets */ 2848 #ifdef IFNET_BUF_RING 2849 flags = &ss->if_drv_flags; 2850 #else 2851 flags = &ifp->if_drv_flags; 2852 #endif 2853 mtx_lock(&ss->tx.mtx); 2854 if ((*flags) & IFF_DRV_OACTIVE && 2855 tx->req - tx->done < (tx->mask + 1)/4) { 2856 *(flags) &= ~IFF_DRV_OACTIVE; 2857 ss->tx.wake++; 2858 mxge_start_locked(ss); 2859 } 2860 #ifdef IFNET_BUF_RING 2861 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2862 /* let the NIC stop polling this queue, since there 2863 * are no more transmits pending */ 2864 if (tx->req == tx->done) { 2865 *tx->send_stop = 1; 2866 tx->queue_active = 0; 2867 tx->deactivate++; 2868 wmb(); 2869 } 2870 } 2871 #endif 2872 mtx_unlock(&ss->tx.mtx); 2873 2874 } 2875 2876 static struct mxge_media_type mxge_xfp_media_types[] = 2877 { 2878 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2879 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2880 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2881 {0, (1 << 5), "10GBASE-ER"}, 2882 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2883 {0, (1 << 3), "10GBASE-SW"}, 2884 {0, (1 << 2), "10GBASE-LW"}, 2885 {0, (1 << 1), "10GBASE-EW"}, 2886 {0, (1 << 0), "Reserved"} 2887 }; 2888 static struct mxge_media_type mxge_sfp_media_types[] = 2889 { 2890 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2891 {0, (1 << 7), "Reserved"}, 2892 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2893 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2894 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2895 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2896 }; 2897 2898 static void 2899 mxge_media_set(mxge_softc_t *sc, int media_type) 2900 { 2901 2902 2903 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2904 0, NULL); 2905 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2906 sc->current_media = media_type; 2907 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2908 } 2909 2910 static void 2911 mxge_media_init(mxge_softc_t *sc) 2912 { 2913 char *ptr; 2914 int i; 2915 2916 ifmedia_removeall(&sc->media); 2917 mxge_media_set(sc, IFM_AUTO); 2918 2919 /* 2920 * parse the product code to deterimine the interface type 2921 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2922 * after the 3rd dash in the driver's cached copy of the 2923 * EEPROM's product code string. 2924 */ 2925 ptr = sc->product_code_string; 2926 if (ptr == NULL) { 2927 device_printf(sc->dev, "Missing product code\n"); 2928 return; 2929 } 2930 2931 for (i = 0; i < 3; i++, ptr++) { 2932 ptr = strchr(ptr, '-'); 2933 if (ptr == NULL) { 2934 device_printf(sc->dev, 2935 "only %d dashes in PC?!?\n", i); 2936 return; 2937 } 2938 } 2939 if (*ptr == 'C' || *(ptr +1) == 'C') { 2940 /* -C is CX4 */ 2941 sc->connector = MXGE_CX4; 2942 mxge_media_set(sc, IFM_10G_CX4); 2943 } else if (*ptr == 'Q') { 2944 /* -Q is Quad Ribbon Fiber */ 2945 sc->connector = MXGE_QRF; 2946 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2947 /* FreeBSD has no media type for Quad ribbon fiber */ 2948 } else if (*ptr == 'R') { 2949 /* -R is XFP */ 2950 sc->connector = MXGE_XFP; 2951 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2952 /* -S or -2S is SFP+ */ 2953 sc->connector = MXGE_SFP; 2954 } else { 2955 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2956 } 2957 } 2958 2959 /* 2960 * Determine the media type for a NIC. Some XFPs will identify 2961 * themselves only when their link is up, so this is initiated via a 2962 * link up interrupt. However, this can potentially take up to 2963 * several milliseconds, so it is run via the watchdog routine, rather 2964 * than in the interrupt handler itself. 2965 */ 2966 static void 2967 mxge_media_probe(mxge_softc_t *sc) 2968 { 2969 mxge_cmd_t cmd; 2970 char *cage_type; 2971 2972 struct mxge_media_type *mxge_media_types = NULL; 2973 int i, err, ms, mxge_media_type_entries; 2974 uint32_t byte; 2975 2976 sc->need_media_probe = 0; 2977 2978 if (sc->connector == MXGE_XFP) { 2979 /* -R is XFP */ 2980 mxge_media_types = mxge_xfp_media_types; 2981 mxge_media_type_entries = 2982 nitems(mxge_xfp_media_types); 2983 byte = MXGE_XFP_COMPLIANCE_BYTE; 2984 cage_type = "XFP"; 2985 } else if (sc->connector == MXGE_SFP) { 2986 /* -S or -2S is SFP+ */ 2987 mxge_media_types = mxge_sfp_media_types; 2988 mxge_media_type_entries = 2989 nitems(mxge_sfp_media_types); 2990 cage_type = "SFP+"; 2991 byte = 3; 2992 } else { 2993 /* nothing to do; media type cannot change */ 2994 return; 2995 } 2996 2997 /* 2998 * At this point we know the NIC has an XFP cage, so now we 2999 * try to determine what is in the cage by using the 3000 * firmware's XFP I2C commands to read the XFP 10GbE compilance 3001 * register. We read just one byte, which may take over 3002 * a millisecond 3003 */ 3004 3005 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3006 cmd.data1 = byte; 3007 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3008 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3009 device_printf(sc->dev, "failed to read XFP\n"); 3010 } 3011 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3012 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3013 } 3014 if (err != MXGEFW_CMD_OK) { 3015 return; 3016 } 3017 3018 /* now we wait for the data to be cached */ 3019 cmd.data0 = byte; 3020 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3021 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3022 DELAY(1000); 3023 cmd.data0 = byte; 3024 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3025 } 3026 if (err != MXGEFW_CMD_OK) { 3027 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3028 cage_type, err, ms); 3029 return; 3030 } 3031 3032 if (cmd.data0 == mxge_media_types[0].bitmask) { 3033 if (mxge_verbose) 3034 device_printf(sc->dev, "%s:%s\n", cage_type, 3035 mxge_media_types[0].name); 3036 if (sc->current_media != mxge_media_types[0].flag) { 3037 mxge_media_init(sc); 3038 mxge_media_set(sc, mxge_media_types[0].flag); 3039 } 3040 return; 3041 } 3042 for (i = 1; i < mxge_media_type_entries; i++) { 3043 if (cmd.data0 & mxge_media_types[i].bitmask) { 3044 if (mxge_verbose) 3045 device_printf(sc->dev, "%s:%s\n", 3046 cage_type, 3047 mxge_media_types[i].name); 3048 3049 if (sc->current_media != mxge_media_types[i].flag) { 3050 mxge_media_init(sc); 3051 mxge_media_set(sc, mxge_media_types[i].flag); 3052 } 3053 return; 3054 } 3055 } 3056 if (mxge_verbose) 3057 device_printf(sc->dev, "%s media 0x%x unknown\n", 3058 cage_type, cmd.data0); 3059 3060 return; 3061 } 3062 3063 static void 3064 mxge_intr(void *arg) 3065 { 3066 struct mxge_slice_state *ss = arg; 3067 mxge_softc_t *sc = ss->sc; 3068 mcp_irq_data_t *stats = ss->fw_stats; 3069 mxge_tx_ring_t *tx = &ss->tx; 3070 mxge_rx_done_t *rx_done = &ss->rx_done; 3071 uint32_t send_done_count; 3072 uint8_t valid; 3073 3074 3075 #ifndef IFNET_BUF_RING 3076 /* an interrupt on a non-zero slice is implicitly valid 3077 since MSI-X irqs are not shared */ 3078 if (ss != sc->ss) { 3079 mxge_clean_rx_done(ss); 3080 *ss->irq_claim = be32toh(3); 3081 return; 3082 } 3083 #endif 3084 3085 /* make sure the DMA has finished */ 3086 if (!stats->valid) { 3087 return; 3088 } 3089 valid = stats->valid; 3090 3091 if (sc->legacy_irq) { 3092 /* lower legacy IRQ */ 3093 *sc->irq_deassert = 0; 3094 if (!mxge_deassert_wait) 3095 /* don't wait for conf. that irq is low */ 3096 stats->valid = 0; 3097 } else { 3098 stats->valid = 0; 3099 } 3100 3101 /* loop while waiting for legacy irq deassertion */ 3102 do { 3103 /* check for transmit completes and receives */ 3104 send_done_count = be32toh(stats->send_done_count); 3105 while ((send_done_count != tx->pkt_done) || 3106 (rx_done->entry[rx_done->idx].length != 0)) { 3107 if (send_done_count != tx->pkt_done) 3108 mxge_tx_done(ss, (int)send_done_count); 3109 mxge_clean_rx_done(ss); 3110 send_done_count = be32toh(stats->send_done_count); 3111 } 3112 if (sc->legacy_irq && mxge_deassert_wait) 3113 wmb(); 3114 } while (*((volatile uint8_t *) &stats->valid)); 3115 3116 /* fw link & error stats meaningful only on the first slice */ 3117 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3118 if (sc->link_state != stats->link_up) { 3119 sc->link_state = stats->link_up; 3120 if (sc->link_state) { 3121 if_link_state_change(sc->ifp, LINK_STATE_UP); 3122 if (mxge_verbose) 3123 device_printf(sc->dev, "link up\n"); 3124 } else { 3125 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3126 if (mxge_verbose) 3127 device_printf(sc->dev, "link down\n"); 3128 } 3129 sc->need_media_probe = 1; 3130 } 3131 if (sc->rdma_tags_available != 3132 be32toh(stats->rdma_tags_available)) { 3133 sc->rdma_tags_available = 3134 be32toh(stats->rdma_tags_available); 3135 device_printf(sc->dev, "RDMA timed out! %d tags " 3136 "left\n", sc->rdma_tags_available); 3137 } 3138 3139 if (stats->link_down) { 3140 sc->down_cnt += stats->link_down; 3141 sc->link_state = 0; 3142 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3143 } 3144 } 3145 3146 /* check to see if we have rx token to pass back */ 3147 if (valid & 0x1) 3148 *ss->irq_claim = be32toh(3); 3149 *(ss->irq_claim + 1) = be32toh(3); 3150 } 3151 3152 static void 3153 mxge_init(void *arg) 3154 { 3155 mxge_softc_t *sc = arg; 3156 struct ifnet *ifp = sc->ifp; 3157 3158 3159 mtx_lock(&sc->driver_mtx); 3160 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3161 (void) mxge_open(sc); 3162 mtx_unlock(&sc->driver_mtx); 3163 } 3164 3165 3166 3167 static void 3168 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3169 { 3170 int i; 3171 3172 #if defined(INET) || defined(INET6) 3173 tcp_lro_free(&ss->lc); 3174 #endif 3175 for (i = 0; i <= ss->rx_big.mask; i++) { 3176 if (ss->rx_big.info[i].m == NULL) 3177 continue; 3178 bus_dmamap_unload(ss->rx_big.dmat, 3179 ss->rx_big.info[i].map); 3180 m_freem(ss->rx_big.info[i].m); 3181 ss->rx_big.info[i].m = NULL; 3182 } 3183 3184 for (i = 0; i <= ss->rx_small.mask; i++) { 3185 if (ss->rx_small.info[i].m == NULL) 3186 continue; 3187 bus_dmamap_unload(ss->rx_small.dmat, 3188 ss->rx_small.info[i].map); 3189 m_freem(ss->rx_small.info[i].m); 3190 ss->rx_small.info[i].m = NULL; 3191 } 3192 3193 /* transmit ring used only on the first slice */ 3194 if (ss->tx.info == NULL) 3195 return; 3196 3197 for (i = 0; i <= ss->tx.mask; i++) { 3198 ss->tx.info[i].flag = 0; 3199 if (ss->tx.info[i].m == NULL) 3200 continue; 3201 bus_dmamap_unload(ss->tx.dmat, 3202 ss->tx.info[i].map); 3203 m_freem(ss->tx.info[i].m); 3204 ss->tx.info[i].m = NULL; 3205 } 3206 } 3207 3208 static void 3209 mxge_free_mbufs(mxge_softc_t *sc) 3210 { 3211 int slice; 3212 3213 for (slice = 0; slice < sc->num_slices; slice++) 3214 mxge_free_slice_mbufs(&sc->ss[slice]); 3215 } 3216 3217 static void 3218 mxge_free_slice_rings(struct mxge_slice_state *ss) 3219 { 3220 int i; 3221 3222 3223 if (ss->rx_done.entry != NULL) 3224 mxge_dma_free(&ss->rx_done.dma); 3225 ss->rx_done.entry = NULL; 3226 3227 if (ss->tx.req_bytes != NULL) 3228 free(ss->tx.req_bytes, M_DEVBUF); 3229 ss->tx.req_bytes = NULL; 3230 3231 if (ss->tx.seg_list != NULL) 3232 free(ss->tx.seg_list, M_DEVBUF); 3233 ss->tx.seg_list = NULL; 3234 3235 if (ss->rx_small.shadow != NULL) 3236 free(ss->rx_small.shadow, M_DEVBUF); 3237 ss->rx_small.shadow = NULL; 3238 3239 if (ss->rx_big.shadow != NULL) 3240 free(ss->rx_big.shadow, M_DEVBUF); 3241 ss->rx_big.shadow = NULL; 3242 3243 if (ss->tx.info != NULL) { 3244 if (ss->tx.dmat != NULL) { 3245 for (i = 0; i <= ss->tx.mask; i++) { 3246 bus_dmamap_destroy(ss->tx.dmat, 3247 ss->tx.info[i].map); 3248 } 3249 bus_dma_tag_destroy(ss->tx.dmat); 3250 } 3251 free(ss->tx.info, M_DEVBUF); 3252 } 3253 ss->tx.info = NULL; 3254 3255 if (ss->rx_small.info != NULL) { 3256 if (ss->rx_small.dmat != NULL) { 3257 for (i = 0; i <= ss->rx_small.mask; i++) { 3258 bus_dmamap_destroy(ss->rx_small.dmat, 3259 ss->rx_small.info[i].map); 3260 } 3261 bus_dmamap_destroy(ss->rx_small.dmat, 3262 ss->rx_small.extra_map); 3263 bus_dma_tag_destroy(ss->rx_small.dmat); 3264 } 3265 free(ss->rx_small.info, M_DEVBUF); 3266 } 3267 ss->rx_small.info = NULL; 3268 3269 if (ss->rx_big.info != NULL) { 3270 if (ss->rx_big.dmat != NULL) { 3271 for (i = 0; i <= ss->rx_big.mask; i++) { 3272 bus_dmamap_destroy(ss->rx_big.dmat, 3273 ss->rx_big.info[i].map); 3274 } 3275 bus_dmamap_destroy(ss->rx_big.dmat, 3276 ss->rx_big.extra_map); 3277 bus_dma_tag_destroy(ss->rx_big.dmat); 3278 } 3279 free(ss->rx_big.info, M_DEVBUF); 3280 } 3281 ss->rx_big.info = NULL; 3282 } 3283 3284 static void 3285 mxge_free_rings(mxge_softc_t *sc) 3286 { 3287 int slice; 3288 3289 for (slice = 0; slice < sc->num_slices; slice++) 3290 mxge_free_slice_rings(&sc->ss[slice]); 3291 } 3292 3293 static int 3294 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3295 int tx_ring_entries) 3296 { 3297 mxge_softc_t *sc = ss->sc; 3298 size_t bytes; 3299 int err, i; 3300 3301 /* allocate per-slice receive resources */ 3302 3303 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3304 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3305 3306 /* allocate the rx shadow rings */ 3307 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3308 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3309 3310 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3311 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3312 3313 /* allocate the rx host info rings */ 3314 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3315 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3316 3317 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3318 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3319 3320 /* allocate the rx busdma resources */ 3321 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3322 1, /* alignment */ 3323 4096, /* boundary */ 3324 BUS_SPACE_MAXADDR, /* low */ 3325 BUS_SPACE_MAXADDR, /* high */ 3326 NULL, NULL, /* filter */ 3327 MHLEN, /* maxsize */ 3328 1, /* num segs */ 3329 MHLEN, /* maxsegsize */ 3330 BUS_DMA_ALLOCNOW, /* flags */ 3331 NULL, NULL, /* lock */ 3332 &ss->rx_small.dmat); /* tag */ 3333 if (err != 0) { 3334 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3335 err); 3336 return err; 3337 } 3338 3339 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3340 1, /* alignment */ 3341 #if MXGE_VIRT_JUMBOS 3342 4096, /* boundary */ 3343 #else 3344 0, /* boundary */ 3345 #endif 3346 BUS_SPACE_MAXADDR, /* low */ 3347 BUS_SPACE_MAXADDR, /* high */ 3348 NULL, NULL, /* filter */ 3349 3*4096, /* maxsize */ 3350 #if MXGE_VIRT_JUMBOS 3351 3, /* num segs */ 3352 4096, /* maxsegsize*/ 3353 #else 3354 1, /* num segs */ 3355 MJUM9BYTES, /* maxsegsize*/ 3356 #endif 3357 BUS_DMA_ALLOCNOW, /* flags */ 3358 NULL, NULL, /* lock */ 3359 &ss->rx_big.dmat); /* tag */ 3360 if (err != 0) { 3361 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3362 err); 3363 return err; 3364 } 3365 for (i = 0; i <= ss->rx_small.mask; i++) { 3366 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3367 &ss->rx_small.info[i].map); 3368 if (err != 0) { 3369 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3370 err); 3371 return err; 3372 } 3373 } 3374 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3375 &ss->rx_small.extra_map); 3376 if (err != 0) { 3377 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3378 err); 3379 return err; 3380 } 3381 3382 for (i = 0; i <= ss->rx_big.mask; i++) { 3383 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3384 &ss->rx_big.info[i].map); 3385 if (err != 0) { 3386 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3387 err); 3388 return err; 3389 } 3390 } 3391 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3392 &ss->rx_big.extra_map); 3393 if (err != 0) { 3394 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3395 err); 3396 return err; 3397 } 3398 3399 /* now allocate TX resources */ 3400 3401 #ifndef IFNET_BUF_RING 3402 /* only use a single TX ring for now */ 3403 if (ss != ss->sc->ss) 3404 return 0; 3405 #endif 3406 3407 ss->tx.mask = tx_ring_entries - 1; 3408 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3409 3410 3411 /* allocate the tx request copy block */ 3412 bytes = 8 + 3413 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3414 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3415 /* ensure req_list entries are aligned to 8 bytes */ 3416 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3417 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3418 3419 /* allocate the tx busdma segment list */ 3420 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3421 ss->tx.seg_list = (bus_dma_segment_t *) 3422 malloc(bytes, M_DEVBUF, M_WAITOK); 3423 3424 /* allocate the tx host info ring */ 3425 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3426 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3427 3428 /* allocate the tx busdma resources */ 3429 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3430 1, /* alignment */ 3431 sc->tx_boundary, /* boundary */ 3432 BUS_SPACE_MAXADDR, /* low */ 3433 BUS_SPACE_MAXADDR, /* high */ 3434 NULL, NULL, /* filter */ 3435 65536 + 256, /* maxsize */ 3436 ss->tx.max_desc - 2, /* num segs */ 3437 sc->tx_boundary, /* maxsegsz */ 3438 BUS_DMA_ALLOCNOW, /* flags */ 3439 NULL, NULL, /* lock */ 3440 &ss->tx.dmat); /* tag */ 3441 3442 if (err != 0) { 3443 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3444 err); 3445 return err; 3446 } 3447 3448 /* now use these tags to setup dmamaps for each slot 3449 in the ring */ 3450 for (i = 0; i <= ss->tx.mask; i++) { 3451 err = bus_dmamap_create(ss->tx.dmat, 0, 3452 &ss->tx.info[i].map); 3453 if (err != 0) { 3454 device_printf(sc->dev, "Err %d tx dmamap\n", 3455 err); 3456 return err; 3457 } 3458 } 3459 return 0; 3460 3461 } 3462 3463 static int 3464 mxge_alloc_rings(mxge_softc_t *sc) 3465 { 3466 mxge_cmd_t cmd; 3467 int tx_ring_size; 3468 int tx_ring_entries, rx_ring_entries; 3469 int err, slice; 3470 3471 /* get ring sizes */ 3472 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3473 tx_ring_size = cmd.data0; 3474 if (err != 0) { 3475 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3476 goto abort; 3477 } 3478 3479 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3480 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3481 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3482 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3483 IFQ_SET_READY(&sc->ifp->if_snd); 3484 3485 for (slice = 0; slice < sc->num_slices; slice++) { 3486 err = mxge_alloc_slice_rings(&sc->ss[slice], 3487 rx_ring_entries, 3488 tx_ring_entries); 3489 if (err != 0) 3490 goto abort; 3491 } 3492 return 0; 3493 3494 abort: 3495 mxge_free_rings(sc); 3496 return err; 3497 3498 } 3499 3500 3501 static void 3502 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3503 { 3504 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3505 3506 if (bufsize < MCLBYTES) { 3507 /* easy, everything fits in a single buffer */ 3508 *big_buf_size = MCLBYTES; 3509 *cl_size = MCLBYTES; 3510 *nbufs = 1; 3511 return; 3512 } 3513 3514 if (bufsize < MJUMPAGESIZE) { 3515 /* still easy, everything still fits in a single buffer */ 3516 *big_buf_size = MJUMPAGESIZE; 3517 *cl_size = MJUMPAGESIZE; 3518 *nbufs = 1; 3519 return; 3520 } 3521 #if MXGE_VIRT_JUMBOS 3522 /* now we need to use virtually contiguous buffers */ 3523 *cl_size = MJUM9BYTES; 3524 *big_buf_size = 4096; 3525 *nbufs = mtu / 4096 + 1; 3526 /* needs to be a power of two, so round up */ 3527 if (*nbufs == 3) 3528 *nbufs = 4; 3529 #else 3530 *cl_size = MJUM9BYTES; 3531 *big_buf_size = MJUM9BYTES; 3532 *nbufs = 1; 3533 #endif 3534 } 3535 3536 static int 3537 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3538 { 3539 mxge_softc_t *sc; 3540 mxge_cmd_t cmd; 3541 bus_dmamap_t map; 3542 int err, i, slice; 3543 3544 3545 sc = ss->sc; 3546 slice = ss - sc->ss; 3547 3548 #if defined(INET) || defined(INET6) 3549 (void)tcp_lro_init(&ss->lc); 3550 #endif 3551 ss->lc.ifp = sc->ifp; 3552 3553 /* get the lanai pointers to the send and receive rings */ 3554 3555 err = 0; 3556 #ifndef IFNET_BUF_RING 3557 /* We currently only send from the first slice */ 3558 if (slice == 0) { 3559 #endif 3560 cmd.data0 = slice; 3561 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3562 ss->tx.lanai = 3563 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3564 ss->tx.send_go = (volatile uint32_t *) 3565 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3566 ss->tx.send_stop = (volatile uint32_t *) 3567 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3568 #ifndef IFNET_BUF_RING 3569 } 3570 #endif 3571 cmd.data0 = slice; 3572 err |= mxge_send_cmd(sc, 3573 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3574 ss->rx_small.lanai = 3575 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3576 cmd.data0 = slice; 3577 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3578 ss->rx_big.lanai = 3579 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3580 3581 if (err != 0) { 3582 device_printf(sc->dev, 3583 "failed to get ring sizes or locations\n"); 3584 return EIO; 3585 } 3586 3587 /* stock receive rings */ 3588 for (i = 0; i <= ss->rx_small.mask; i++) { 3589 map = ss->rx_small.info[i].map; 3590 err = mxge_get_buf_small(ss, map, i); 3591 if (err) { 3592 device_printf(sc->dev, "alloced %d/%d smalls\n", 3593 i, ss->rx_small.mask + 1); 3594 return ENOMEM; 3595 } 3596 } 3597 for (i = 0; i <= ss->rx_big.mask; i++) { 3598 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3599 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3600 } 3601 ss->rx_big.nbufs = nbufs; 3602 ss->rx_big.cl_size = cl_size; 3603 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3604 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3605 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3606 map = ss->rx_big.info[i].map; 3607 err = mxge_get_buf_big(ss, map, i); 3608 if (err) { 3609 device_printf(sc->dev, "alloced %d/%d bigs\n", 3610 i, ss->rx_big.mask + 1); 3611 return ENOMEM; 3612 } 3613 } 3614 return 0; 3615 } 3616 3617 static int 3618 mxge_open(mxge_softc_t *sc) 3619 { 3620 mxge_cmd_t cmd; 3621 int err, big_bytes, nbufs, slice, cl_size, i; 3622 bus_addr_t bus; 3623 volatile uint8_t *itable; 3624 struct mxge_slice_state *ss; 3625 3626 /* Copy the MAC address in case it was overridden */ 3627 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3628 3629 err = mxge_reset(sc, 1); 3630 if (err != 0) { 3631 device_printf(sc->dev, "failed to reset\n"); 3632 return EIO; 3633 } 3634 3635 if (sc->num_slices > 1) { 3636 /* setup the indirection table */ 3637 cmd.data0 = sc->num_slices; 3638 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3639 &cmd); 3640 3641 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3642 &cmd); 3643 if (err != 0) { 3644 device_printf(sc->dev, 3645 "failed to setup rss tables\n"); 3646 return err; 3647 } 3648 3649 /* just enable an identity mapping */ 3650 itable = sc->sram + cmd.data0; 3651 for (i = 0; i < sc->num_slices; i++) 3652 itable[i] = (uint8_t)i; 3653 3654 cmd.data0 = 1; 3655 cmd.data1 = mxge_rss_hash_type; 3656 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3657 if (err != 0) { 3658 device_printf(sc->dev, "failed to enable slices\n"); 3659 return err; 3660 } 3661 } 3662 3663 3664 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3665 3666 cmd.data0 = nbufs; 3667 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3668 &cmd); 3669 /* error is only meaningful if we're trying to set 3670 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3671 if (err && nbufs > 1) { 3672 device_printf(sc->dev, 3673 "Failed to set alway-use-n to %d\n", 3674 nbufs); 3675 return EIO; 3676 } 3677 /* Give the firmware the mtu and the big and small buffer 3678 sizes. The firmware wants the big buf size to be a power 3679 of two. Luckily, FreeBSD's clusters are powers of two */ 3680 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3681 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3682 cmd.data0 = MHLEN - MXGEFW_PAD; 3683 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3684 &cmd); 3685 cmd.data0 = big_bytes; 3686 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3687 3688 if (err != 0) { 3689 device_printf(sc->dev, "failed to setup params\n"); 3690 goto abort; 3691 } 3692 3693 /* Now give him the pointer to the stats block */ 3694 for (slice = 0; 3695 #ifdef IFNET_BUF_RING 3696 slice < sc->num_slices; 3697 #else 3698 slice < 1; 3699 #endif 3700 slice++) { 3701 ss = &sc->ss[slice]; 3702 cmd.data0 = 3703 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3704 cmd.data1 = 3705 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3706 cmd.data2 = sizeof(struct mcp_irq_data); 3707 cmd.data2 |= (slice << 16); 3708 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3709 } 3710 3711 if (err != 0) { 3712 bus = sc->ss->fw_stats_dma.bus_addr; 3713 bus += offsetof(struct mcp_irq_data, send_done_count); 3714 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3715 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3716 err = mxge_send_cmd(sc, 3717 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3718 &cmd); 3719 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3720 sc->fw_multicast_support = 0; 3721 } else { 3722 sc->fw_multicast_support = 1; 3723 } 3724 3725 if (err != 0) { 3726 device_printf(sc->dev, "failed to setup params\n"); 3727 goto abort; 3728 } 3729 3730 for (slice = 0; slice < sc->num_slices; slice++) { 3731 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3732 if (err != 0) { 3733 device_printf(sc->dev, "couldn't open slice %d\n", 3734 slice); 3735 goto abort; 3736 } 3737 } 3738 3739 /* Finally, start the firmware running */ 3740 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3741 if (err) { 3742 device_printf(sc->dev, "Couldn't bring up link\n"); 3743 goto abort; 3744 } 3745 #ifdef IFNET_BUF_RING 3746 for (slice = 0; slice < sc->num_slices; slice++) { 3747 ss = &sc->ss[slice]; 3748 ss->if_drv_flags |= IFF_DRV_RUNNING; 3749 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3750 } 3751 #endif 3752 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3753 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3754 3755 return 0; 3756 3757 3758 abort: 3759 mxge_free_mbufs(sc); 3760 3761 return err; 3762 } 3763 3764 static int 3765 mxge_close(mxge_softc_t *sc, int down) 3766 { 3767 mxge_cmd_t cmd; 3768 int err, old_down_cnt; 3769 #ifdef IFNET_BUF_RING 3770 struct mxge_slice_state *ss; 3771 int slice; 3772 #endif 3773 3774 #ifdef IFNET_BUF_RING 3775 for (slice = 0; slice < sc->num_slices; slice++) { 3776 ss = &sc->ss[slice]; 3777 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3778 } 3779 #endif 3780 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3781 if (!down) { 3782 old_down_cnt = sc->down_cnt; 3783 wmb(); 3784 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3785 if (err) { 3786 device_printf(sc->dev, 3787 "Couldn't bring down link\n"); 3788 } 3789 if (old_down_cnt == sc->down_cnt) { 3790 /* wait for down irq */ 3791 DELAY(10 * sc->intr_coal_delay); 3792 } 3793 wmb(); 3794 if (old_down_cnt == sc->down_cnt) { 3795 device_printf(sc->dev, "never got down irq\n"); 3796 } 3797 } 3798 mxge_free_mbufs(sc); 3799 3800 return 0; 3801 } 3802 3803 static void 3804 mxge_setup_cfg_space(mxge_softc_t *sc) 3805 { 3806 device_t dev = sc->dev; 3807 int reg; 3808 uint16_t lnk, pectl; 3809 3810 /* find the PCIe link width and set max read request to 4KB*/ 3811 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3812 lnk = pci_read_config(dev, reg + 0x12, 2); 3813 sc->link_width = (lnk >> 4) & 0x3f; 3814 3815 if (sc->pectl == 0) { 3816 pectl = pci_read_config(dev, reg + 0x8, 2); 3817 pectl = (pectl & ~0x7000) | (5 << 12); 3818 pci_write_config(dev, reg + 0x8, pectl, 2); 3819 sc->pectl = pectl; 3820 } else { 3821 /* restore saved pectl after watchdog reset */ 3822 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3823 } 3824 } 3825 3826 /* Enable DMA and Memory space access */ 3827 pci_enable_busmaster(dev); 3828 } 3829 3830 static uint32_t 3831 mxge_read_reboot(mxge_softc_t *sc) 3832 { 3833 device_t dev = sc->dev; 3834 uint32_t vs; 3835 3836 /* find the vendor specific offset */ 3837 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3838 device_printf(sc->dev, 3839 "could not find vendor specific offset\n"); 3840 return (uint32_t)-1; 3841 } 3842 /* enable read32 mode */ 3843 pci_write_config(dev, vs + 0x10, 0x3, 1); 3844 /* tell NIC which register to read */ 3845 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3846 return (pci_read_config(dev, vs + 0x14, 4)); 3847 } 3848 3849 static void 3850 mxge_watchdog_reset(mxge_softc_t *sc) 3851 { 3852 struct pci_devinfo *dinfo; 3853 struct mxge_slice_state *ss; 3854 int err, running, s, num_tx_slices = 1; 3855 uint32_t reboot; 3856 uint16_t cmd; 3857 3858 err = ENXIO; 3859 3860 device_printf(sc->dev, "Watchdog reset!\n"); 3861 3862 /* 3863 * check to see if the NIC rebooted. If it did, then all of 3864 * PCI config space has been reset, and things like the 3865 * busmaster bit will be zero. If this is the case, then we 3866 * must restore PCI config space before the NIC can be used 3867 * again 3868 */ 3869 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3870 if (cmd == 0xffff) { 3871 /* 3872 * maybe the watchdog caught the NIC rebooting; wait 3873 * up to 100ms for it to finish. If it does not come 3874 * back, then give up 3875 */ 3876 DELAY(1000*100); 3877 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3878 if (cmd == 0xffff) { 3879 device_printf(sc->dev, "NIC disappeared!\n"); 3880 } 3881 } 3882 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3883 /* print the reboot status */ 3884 reboot = mxge_read_reboot(sc); 3885 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3886 reboot); 3887 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3888 if (running) { 3889 3890 /* 3891 * quiesce NIC so that TX routines will not try to 3892 * xmit after restoration of BAR 3893 */ 3894 3895 /* Mark the link as down */ 3896 if (sc->link_state) { 3897 sc->link_state = 0; 3898 if_link_state_change(sc->ifp, 3899 LINK_STATE_DOWN); 3900 } 3901 #ifdef IFNET_BUF_RING 3902 num_tx_slices = sc->num_slices; 3903 #endif 3904 /* grab all TX locks to ensure no tx */ 3905 for (s = 0; s < num_tx_slices; s++) { 3906 ss = &sc->ss[s]; 3907 mtx_lock(&ss->tx.mtx); 3908 } 3909 mxge_close(sc, 1); 3910 } 3911 /* restore PCI configuration space */ 3912 dinfo = device_get_ivars(sc->dev); 3913 pci_cfg_restore(sc->dev, dinfo); 3914 3915 /* and redo any changes we made to our config space */ 3916 mxge_setup_cfg_space(sc); 3917 3918 /* reload f/w */ 3919 err = mxge_load_firmware(sc, 0); 3920 if (err) { 3921 device_printf(sc->dev, 3922 "Unable to re-load f/w\n"); 3923 } 3924 if (running) { 3925 if (!err) 3926 err = mxge_open(sc); 3927 /* release all TX locks */ 3928 for (s = 0; s < num_tx_slices; s++) { 3929 ss = &sc->ss[s]; 3930 #ifdef IFNET_BUF_RING 3931 mxge_start_locked(ss); 3932 #endif 3933 mtx_unlock(&ss->tx.mtx); 3934 } 3935 } 3936 sc->watchdog_resets++; 3937 } else { 3938 device_printf(sc->dev, 3939 "NIC did not reboot, not resetting\n"); 3940 err = 0; 3941 } 3942 if (err) { 3943 device_printf(sc->dev, "watchdog reset failed\n"); 3944 } else { 3945 if (sc->dying == 2) 3946 sc->dying = 0; 3947 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3948 } 3949 } 3950 3951 static void 3952 mxge_watchdog_task(void *arg, int pending) 3953 { 3954 mxge_softc_t *sc = arg; 3955 3956 3957 mtx_lock(&sc->driver_mtx); 3958 mxge_watchdog_reset(sc); 3959 mtx_unlock(&sc->driver_mtx); 3960 } 3961 3962 static void 3963 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3964 { 3965 tx = &sc->ss[slice].tx; 3966 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3967 device_printf(sc->dev, 3968 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3969 tx->req, tx->done, tx->queue_active); 3970 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3971 tx->activate, tx->deactivate); 3972 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3973 tx->pkt_done, 3974 be32toh(sc->ss->fw_stats->send_done_count)); 3975 } 3976 3977 static int 3978 mxge_watchdog(mxge_softc_t *sc) 3979 { 3980 mxge_tx_ring_t *tx; 3981 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3982 int i, err = 0; 3983 3984 /* see if we have outstanding transmits, which 3985 have been pending for more than mxge_ticks */ 3986 for (i = 0; 3987 #ifdef IFNET_BUF_RING 3988 (i < sc->num_slices) && (err == 0); 3989 #else 3990 (i < 1) && (err == 0); 3991 #endif 3992 i++) { 3993 tx = &sc->ss[i].tx; 3994 if (tx->req != tx->done && 3995 tx->watchdog_req != tx->watchdog_done && 3996 tx->done == tx->watchdog_done) { 3997 /* check for pause blocking before resetting */ 3998 if (tx->watchdog_rx_pause == rx_pause) { 3999 mxge_warn_stuck(sc, tx, i); 4000 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4001 return (ENXIO); 4002 } 4003 else 4004 device_printf(sc->dev, "Flow control blocking " 4005 "xmits, check link partner\n"); 4006 } 4007 4008 tx->watchdog_req = tx->req; 4009 tx->watchdog_done = tx->done; 4010 tx->watchdog_rx_pause = rx_pause; 4011 } 4012 4013 if (sc->need_media_probe) 4014 mxge_media_probe(sc); 4015 return (err); 4016 } 4017 4018 static uint64_t 4019 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 4020 { 4021 struct mxge_softc *sc; 4022 uint64_t rv; 4023 4024 sc = if_getsoftc(ifp); 4025 rv = 0; 4026 4027 switch (cnt) { 4028 case IFCOUNTER_IPACKETS: 4029 for (int s = 0; s < sc->num_slices; s++) 4030 rv += sc->ss[s].ipackets; 4031 return (rv); 4032 case IFCOUNTER_OPACKETS: 4033 for (int s = 0; s < sc->num_slices; s++) 4034 rv += sc->ss[s].opackets; 4035 return (rv); 4036 case IFCOUNTER_OERRORS: 4037 for (int s = 0; s < sc->num_slices; s++) 4038 rv += sc->ss[s].oerrors; 4039 return (rv); 4040 #ifdef IFNET_BUF_RING 4041 case IFCOUNTER_OBYTES: 4042 for (int s = 0; s < sc->num_slices; s++) 4043 rv += sc->ss[s].obytes; 4044 return (rv); 4045 case IFCOUNTER_OMCASTS: 4046 for (int s = 0; s < sc->num_slices; s++) 4047 rv += sc->ss[s].omcasts; 4048 return (rv); 4049 case IFCOUNTER_OQDROPS: 4050 for (int s = 0; s < sc->num_slices; s++) 4051 rv += sc->ss[s].tx.br->br_drops; 4052 return (rv); 4053 #endif 4054 default: 4055 return (if_get_counter_default(ifp, cnt)); 4056 } 4057 } 4058 4059 static void 4060 mxge_tick(void *arg) 4061 { 4062 mxge_softc_t *sc = arg; 4063 u_long pkts = 0; 4064 int err = 0; 4065 int running, ticks; 4066 uint16_t cmd; 4067 4068 ticks = mxge_ticks; 4069 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4070 if (running) { 4071 if (!sc->watchdog_countdown) { 4072 err = mxge_watchdog(sc); 4073 sc->watchdog_countdown = 4; 4074 } 4075 sc->watchdog_countdown--; 4076 } 4077 if (pkts == 0) { 4078 /* ensure NIC did not suffer h/w fault while idle */ 4079 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4080 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4081 sc->dying = 2; 4082 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4083 err = ENXIO; 4084 } 4085 /* look less often if NIC is idle */ 4086 ticks *= 4; 4087 } 4088 4089 if (err == 0) 4090 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4091 4092 } 4093 4094 static int 4095 mxge_media_change(struct ifnet *ifp) 4096 { 4097 return EINVAL; 4098 } 4099 4100 static int 4101 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4102 { 4103 struct ifnet *ifp = sc->ifp; 4104 int real_mtu, old_mtu; 4105 int err = 0; 4106 4107 4108 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4109 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4110 return EINVAL; 4111 mtx_lock(&sc->driver_mtx); 4112 old_mtu = ifp->if_mtu; 4113 ifp->if_mtu = mtu; 4114 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4115 mxge_close(sc, 0); 4116 err = mxge_open(sc); 4117 if (err != 0) { 4118 ifp->if_mtu = old_mtu; 4119 mxge_close(sc, 0); 4120 (void) mxge_open(sc); 4121 } 4122 } 4123 mtx_unlock(&sc->driver_mtx); 4124 return err; 4125 } 4126 4127 static void 4128 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4129 { 4130 mxge_softc_t *sc = ifp->if_softc; 4131 4132 4133 if (sc == NULL) 4134 return; 4135 ifmr->ifm_status = IFM_AVALID; 4136 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4137 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4138 ifmr->ifm_active |= sc->current_media; 4139 } 4140 4141 static int 4142 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 4143 { 4144 mxge_cmd_t cmd; 4145 uint32_t i2c_args; 4146 int i, ms, err; 4147 4148 4149 if (i2c->dev_addr != 0xA0 && 4150 i2c->dev_addr != 0xA2) 4151 return (EINVAL); 4152 if (i2c->len > sizeof(i2c->data)) 4153 return (EINVAL); 4154 4155 for (i = 0; i < i2c->len; i++) { 4156 i2c_args = i2c->dev_addr << 0x8; 4157 i2c_args |= i2c->offset + i; 4158 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 4159 cmd.data1 = i2c_args; 4160 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 4161 4162 if (err != MXGEFW_CMD_OK) 4163 return (EIO); 4164 /* now we wait for the data to be cached */ 4165 cmd.data0 = i2c_args & 0xff; 4166 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4167 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 4168 cmd.data0 = i2c_args & 0xff; 4169 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4170 if (err == EBUSY) 4171 DELAY(1000); 4172 } 4173 if (err != MXGEFW_CMD_OK) 4174 return (EIO); 4175 i2c->data[i] = cmd.data0; 4176 } 4177 return (0); 4178 } 4179 4180 static int 4181 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4182 { 4183 mxge_softc_t *sc = ifp->if_softc; 4184 struct ifreq *ifr = (struct ifreq *)data; 4185 struct ifi2creq i2c; 4186 int err, mask; 4187 4188 err = 0; 4189 switch (command) { 4190 case SIOCSIFMTU: 4191 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4192 break; 4193 4194 case SIOCSIFFLAGS: 4195 mtx_lock(&sc->driver_mtx); 4196 if (sc->dying) { 4197 mtx_unlock(&sc->driver_mtx); 4198 return EINVAL; 4199 } 4200 if (ifp->if_flags & IFF_UP) { 4201 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4202 err = mxge_open(sc); 4203 } else { 4204 /* take care of promis can allmulti 4205 flag chages */ 4206 mxge_change_promisc(sc, 4207 ifp->if_flags & IFF_PROMISC); 4208 mxge_set_multicast_list(sc); 4209 } 4210 } else { 4211 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4212 mxge_close(sc, 0); 4213 } 4214 } 4215 mtx_unlock(&sc->driver_mtx); 4216 break; 4217 4218 case SIOCADDMULTI: 4219 case SIOCDELMULTI: 4220 mtx_lock(&sc->driver_mtx); 4221 if (sc->dying) { 4222 mtx_unlock(&sc->driver_mtx); 4223 return (EINVAL); 4224 } 4225 mxge_set_multicast_list(sc); 4226 mtx_unlock(&sc->driver_mtx); 4227 break; 4228 4229 case SIOCSIFCAP: 4230 mtx_lock(&sc->driver_mtx); 4231 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4232 if (mask & IFCAP_TXCSUM) { 4233 if (IFCAP_TXCSUM & ifp->if_capenable) { 4234 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4235 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4236 } else { 4237 ifp->if_capenable |= IFCAP_TXCSUM; 4238 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4239 } 4240 } else if (mask & IFCAP_RXCSUM) { 4241 if (IFCAP_RXCSUM & ifp->if_capenable) { 4242 ifp->if_capenable &= ~IFCAP_RXCSUM; 4243 } else { 4244 ifp->if_capenable |= IFCAP_RXCSUM; 4245 } 4246 } 4247 if (mask & IFCAP_TSO4) { 4248 if (IFCAP_TSO4 & ifp->if_capenable) { 4249 ifp->if_capenable &= ~IFCAP_TSO4; 4250 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4251 ifp->if_capenable |= IFCAP_TSO4; 4252 ifp->if_hwassist |= CSUM_TSO; 4253 } else { 4254 printf("mxge requires tx checksum offload" 4255 " be enabled to use TSO\n"); 4256 err = EINVAL; 4257 } 4258 } 4259 #if IFCAP_TSO6 4260 if (mask & IFCAP_TXCSUM_IPV6) { 4261 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4262 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4263 | IFCAP_TSO6); 4264 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4265 | CSUM_UDP); 4266 } else { 4267 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4268 ifp->if_hwassist |= (CSUM_TCP_IPV6 4269 | CSUM_UDP_IPV6); 4270 } 4271 } else if (mask & IFCAP_RXCSUM_IPV6) { 4272 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4273 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4274 } else { 4275 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4276 } 4277 } 4278 if (mask & IFCAP_TSO6) { 4279 if (IFCAP_TSO6 & ifp->if_capenable) { 4280 ifp->if_capenable &= ~IFCAP_TSO6; 4281 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4282 ifp->if_capenable |= IFCAP_TSO6; 4283 ifp->if_hwassist |= CSUM_TSO; 4284 } else { 4285 printf("mxge requires tx checksum offload" 4286 " be enabled to use TSO\n"); 4287 err = EINVAL; 4288 } 4289 } 4290 #endif /*IFCAP_TSO6 */ 4291 4292 if (mask & IFCAP_LRO) 4293 ifp->if_capenable ^= IFCAP_LRO; 4294 if (mask & IFCAP_VLAN_HWTAGGING) 4295 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4296 if (mask & IFCAP_VLAN_HWTSO) 4297 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4298 4299 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4300 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4301 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4302 4303 mtx_unlock(&sc->driver_mtx); 4304 VLAN_CAPABILITIES(ifp); 4305 4306 break; 4307 4308 case SIOCGIFMEDIA: 4309 mtx_lock(&sc->driver_mtx); 4310 if (sc->dying) { 4311 mtx_unlock(&sc->driver_mtx); 4312 return (EINVAL); 4313 } 4314 mxge_media_probe(sc); 4315 mtx_unlock(&sc->driver_mtx); 4316 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4317 &sc->media, command); 4318 break; 4319 4320 case SIOCGI2C: 4321 if (sc->connector != MXGE_XFP && 4322 sc->connector != MXGE_SFP) { 4323 err = ENXIO; 4324 break; 4325 } 4326 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4327 if (err != 0) 4328 break; 4329 mtx_lock(&sc->driver_mtx); 4330 if (sc->dying) { 4331 mtx_unlock(&sc->driver_mtx); 4332 return (EINVAL); 4333 } 4334 err = mxge_fetch_i2c(sc, &i2c); 4335 mtx_unlock(&sc->driver_mtx); 4336 if (err == 0) 4337 err = copyout(&i2c, ifr->ifr_ifru.ifru_data, 4338 sizeof(i2c)); 4339 break; 4340 default: 4341 err = ether_ioctl(ifp, command, data); 4342 break; 4343 } 4344 return err; 4345 } 4346 4347 static void 4348 mxge_fetch_tunables(mxge_softc_t *sc) 4349 { 4350 4351 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4352 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4353 &mxge_flow_control); 4354 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4355 &mxge_intr_coal_delay); 4356 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4357 &mxge_nvidia_ecrc_enable); 4358 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4359 &mxge_force_firmware); 4360 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4361 &mxge_deassert_wait); 4362 TUNABLE_INT_FETCH("hw.mxge.verbose", 4363 &mxge_verbose); 4364 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4365 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4366 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4367 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4368 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4369 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4370 4371 if (bootverbose) 4372 mxge_verbose = 1; 4373 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4374 mxge_intr_coal_delay = 30; 4375 if (mxge_ticks == 0) 4376 mxge_ticks = hz / 2; 4377 sc->pause = mxge_flow_control; 4378 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4379 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4380 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4381 } 4382 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4383 mxge_initial_mtu < ETHER_MIN_LEN) 4384 mxge_initial_mtu = ETHERMTU_JUMBO; 4385 4386 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4387 mxge_throttle = MXGE_MAX_THROTTLE; 4388 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4389 mxge_throttle = MXGE_MIN_THROTTLE; 4390 sc->throttle = mxge_throttle; 4391 } 4392 4393 4394 static void 4395 mxge_free_slices(mxge_softc_t *sc) 4396 { 4397 struct mxge_slice_state *ss; 4398 int i; 4399 4400 4401 if (sc->ss == NULL) 4402 return; 4403 4404 for (i = 0; i < sc->num_slices; i++) { 4405 ss = &sc->ss[i]; 4406 if (ss->fw_stats != NULL) { 4407 mxge_dma_free(&ss->fw_stats_dma); 4408 ss->fw_stats = NULL; 4409 #ifdef IFNET_BUF_RING 4410 if (ss->tx.br != NULL) { 4411 drbr_free(ss->tx.br, M_DEVBUF); 4412 ss->tx.br = NULL; 4413 } 4414 #endif 4415 mtx_destroy(&ss->tx.mtx); 4416 } 4417 if (ss->rx_done.entry != NULL) { 4418 mxge_dma_free(&ss->rx_done.dma); 4419 ss->rx_done.entry = NULL; 4420 } 4421 } 4422 free(sc->ss, M_DEVBUF); 4423 sc->ss = NULL; 4424 } 4425 4426 static int 4427 mxge_alloc_slices(mxge_softc_t *sc) 4428 { 4429 mxge_cmd_t cmd; 4430 struct mxge_slice_state *ss; 4431 size_t bytes; 4432 int err, i, max_intr_slots; 4433 4434 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4435 if (err != 0) { 4436 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4437 return err; 4438 } 4439 sc->rx_ring_size = cmd.data0; 4440 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4441 4442 bytes = sizeof (*sc->ss) * sc->num_slices; 4443 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4444 if (sc->ss == NULL) 4445 return (ENOMEM); 4446 for (i = 0; i < sc->num_slices; i++) { 4447 ss = &sc->ss[i]; 4448 4449 ss->sc = sc; 4450 4451 /* allocate per-slice rx interrupt queues */ 4452 4453 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4454 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4455 if (err != 0) 4456 goto abort; 4457 ss->rx_done.entry = ss->rx_done.dma.addr; 4458 bzero(ss->rx_done.entry, bytes); 4459 4460 /* 4461 * allocate the per-slice firmware stats; stats 4462 * (including tx) are used used only on the first 4463 * slice for now 4464 */ 4465 #ifndef IFNET_BUF_RING 4466 if (i > 0) 4467 continue; 4468 #endif 4469 4470 bytes = sizeof (*ss->fw_stats); 4471 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4472 sizeof (*ss->fw_stats), 64); 4473 if (err != 0) 4474 goto abort; 4475 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4476 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4477 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4478 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4479 #ifdef IFNET_BUF_RING 4480 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4481 &ss->tx.mtx); 4482 #endif 4483 } 4484 4485 return (0); 4486 4487 abort: 4488 mxge_free_slices(sc); 4489 return (ENOMEM); 4490 } 4491 4492 static void 4493 mxge_slice_probe(mxge_softc_t *sc) 4494 { 4495 mxge_cmd_t cmd; 4496 char *old_fw; 4497 int msix_cnt, status, max_intr_slots; 4498 4499 sc->num_slices = 1; 4500 /* 4501 * don't enable multiple slices if they are not enabled, 4502 * or if this is not an SMP system 4503 */ 4504 4505 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4506 return; 4507 4508 /* see how many MSI-X interrupts are available */ 4509 msix_cnt = pci_msix_count(sc->dev); 4510 if (msix_cnt < 2) 4511 return; 4512 4513 /* now load the slice aware firmware see what it supports */ 4514 old_fw = sc->fw_name; 4515 if (old_fw == mxge_fw_aligned) 4516 sc->fw_name = mxge_fw_rss_aligned; 4517 else 4518 sc->fw_name = mxge_fw_rss_unaligned; 4519 status = mxge_load_firmware(sc, 0); 4520 if (status != 0) { 4521 device_printf(sc->dev, "Falling back to a single slice\n"); 4522 return; 4523 } 4524 4525 /* try to send a reset command to the card to see if it 4526 is alive */ 4527 memset(&cmd, 0, sizeof (cmd)); 4528 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4529 if (status != 0) { 4530 device_printf(sc->dev, "failed reset\n"); 4531 goto abort_with_fw; 4532 } 4533 4534 /* get rx ring size */ 4535 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4536 if (status != 0) { 4537 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4538 goto abort_with_fw; 4539 } 4540 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4541 4542 /* tell it the size of the interrupt queues */ 4543 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4544 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4545 if (status != 0) { 4546 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4547 goto abort_with_fw; 4548 } 4549 4550 /* ask the maximum number of slices it supports */ 4551 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4552 if (status != 0) { 4553 device_printf(sc->dev, 4554 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4555 goto abort_with_fw; 4556 } 4557 sc->num_slices = cmd.data0; 4558 if (sc->num_slices > msix_cnt) 4559 sc->num_slices = msix_cnt; 4560 4561 if (mxge_max_slices == -1) { 4562 /* cap to number of CPUs in system */ 4563 if (sc->num_slices > mp_ncpus) 4564 sc->num_slices = mp_ncpus; 4565 } else { 4566 if (sc->num_slices > mxge_max_slices) 4567 sc->num_slices = mxge_max_slices; 4568 } 4569 /* make sure it is a power of two */ 4570 while (sc->num_slices & (sc->num_slices - 1)) 4571 sc->num_slices--; 4572 4573 if (mxge_verbose) 4574 device_printf(sc->dev, "using %d slices\n", 4575 sc->num_slices); 4576 4577 return; 4578 4579 abort_with_fw: 4580 sc->fw_name = old_fw; 4581 (void) mxge_load_firmware(sc, 0); 4582 } 4583 4584 static int 4585 mxge_add_msix_irqs(mxge_softc_t *sc) 4586 { 4587 size_t bytes; 4588 int count, err, i, rid; 4589 4590 rid = PCIR_BAR(2); 4591 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4592 &rid, RF_ACTIVE); 4593 4594 if (sc->msix_table_res == NULL) { 4595 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4596 return ENXIO; 4597 } 4598 4599 count = sc->num_slices; 4600 err = pci_alloc_msix(sc->dev, &count); 4601 if (err != 0) { 4602 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4603 "err = %d \n", sc->num_slices, err); 4604 goto abort_with_msix_table; 4605 } 4606 if (count < sc->num_slices) { 4607 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4608 count, sc->num_slices); 4609 device_printf(sc->dev, 4610 "Try setting hw.mxge.max_slices to %d\n", 4611 count); 4612 err = ENOSPC; 4613 goto abort_with_msix; 4614 } 4615 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4616 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4617 if (sc->msix_irq_res == NULL) { 4618 err = ENOMEM; 4619 goto abort_with_msix; 4620 } 4621 4622 for (i = 0; i < sc->num_slices; i++) { 4623 rid = i + 1; 4624 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4625 SYS_RES_IRQ, 4626 &rid, RF_ACTIVE); 4627 if (sc->msix_irq_res[i] == NULL) { 4628 device_printf(sc->dev, "couldn't allocate IRQ res" 4629 " for message %d\n", i); 4630 err = ENXIO; 4631 goto abort_with_res; 4632 } 4633 } 4634 4635 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4636 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4637 4638 for (i = 0; i < sc->num_slices; i++) { 4639 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4640 INTR_TYPE_NET | INTR_MPSAFE, 4641 #if __FreeBSD_version > 700030 4642 NULL, 4643 #endif 4644 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4645 if (err != 0) { 4646 device_printf(sc->dev, "couldn't setup intr for " 4647 "message %d\n", i); 4648 goto abort_with_intr; 4649 } 4650 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4651 sc->msix_ih[i], "s%d", i); 4652 } 4653 4654 if (mxge_verbose) { 4655 device_printf(sc->dev, "using %d msix IRQs:", 4656 sc->num_slices); 4657 for (i = 0; i < sc->num_slices; i++) 4658 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4659 printf("\n"); 4660 } 4661 return (0); 4662 4663 abort_with_intr: 4664 for (i = 0; i < sc->num_slices; i++) { 4665 if (sc->msix_ih[i] != NULL) { 4666 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4667 sc->msix_ih[i]); 4668 sc->msix_ih[i] = NULL; 4669 } 4670 } 4671 free(sc->msix_ih, M_DEVBUF); 4672 4673 4674 abort_with_res: 4675 for (i = 0; i < sc->num_slices; i++) { 4676 rid = i + 1; 4677 if (sc->msix_irq_res[i] != NULL) 4678 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4679 sc->msix_irq_res[i]); 4680 sc->msix_irq_res[i] = NULL; 4681 } 4682 free(sc->msix_irq_res, M_DEVBUF); 4683 4684 4685 abort_with_msix: 4686 pci_release_msi(sc->dev); 4687 4688 abort_with_msix_table: 4689 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4690 sc->msix_table_res); 4691 4692 return err; 4693 } 4694 4695 static int 4696 mxge_add_single_irq(mxge_softc_t *sc) 4697 { 4698 int count, err, rid; 4699 4700 count = pci_msi_count(sc->dev); 4701 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4702 rid = 1; 4703 } else { 4704 rid = 0; 4705 sc->legacy_irq = 1; 4706 } 4707 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4708 RF_SHAREABLE | RF_ACTIVE); 4709 if (sc->irq_res == NULL) { 4710 device_printf(sc->dev, "could not alloc interrupt\n"); 4711 return ENXIO; 4712 } 4713 if (mxge_verbose) 4714 device_printf(sc->dev, "using %s irq %jd\n", 4715 sc->legacy_irq ? "INTx" : "MSI", 4716 rman_get_start(sc->irq_res)); 4717 err = bus_setup_intr(sc->dev, sc->irq_res, 4718 INTR_TYPE_NET | INTR_MPSAFE, 4719 #if __FreeBSD_version > 700030 4720 NULL, 4721 #endif 4722 mxge_intr, &sc->ss[0], &sc->ih); 4723 if (err != 0) { 4724 bus_release_resource(sc->dev, SYS_RES_IRQ, 4725 sc->legacy_irq ? 0 : 1, sc->irq_res); 4726 if (!sc->legacy_irq) 4727 pci_release_msi(sc->dev); 4728 } 4729 return err; 4730 } 4731 4732 static void 4733 mxge_rem_msix_irqs(mxge_softc_t *sc) 4734 { 4735 int i, rid; 4736 4737 for (i = 0; i < sc->num_slices; i++) { 4738 if (sc->msix_ih[i] != NULL) { 4739 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4740 sc->msix_ih[i]); 4741 sc->msix_ih[i] = NULL; 4742 } 4743 } 4744 free(sc->msix_ih, M_DEVBUF); 4745 4746 for (i = 0; i < sc->num_slices; i++) { 4747 rid = i + 1; 4748 if (sc->msix_irq_res[i] != NULL) 4749 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4750 sc->msix_irq_res[i]); 4751 sc->msix_irq_res[i] = NULL; 4752 } 4753 free(sc->msix_irq_res, M_DEVBUF); 4754 4755 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4756 sc->msix_table_res); 4757 4758 pci_release_msi(sc->dev); 4759 return; 4760 } 4761 4762 static void 4763 mxge_rem_single_irq(mxge_softc_t *sc) 4764 { 4765 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4766 bus_release_resource(sc->dev, SYS_RES_IRQ, 4767 sc->legacy_irq ? 0 : 1, sc->irq_res); 4768 if (!sc->legacy_irq) 4769 pci_release_msi(sc->dev); 4770 } 4771 4772 static void 4773 mxge_rem_irq(mxge_softc_t *sc) 4774 { 4775 if (sc->num_slices > 1) 4776 mxge_rem_msix_irqs(sc); 4777 else 4778 mxge_rem_single_irq(sc); 4779 } 4780 4781 static int 4782 mxge_add_irq(mxge_softc_t *sc) 4783 { 4784 int err; 4785 4786 if (sc->num_slices > 1) 4787 err = mxge_add_msix_irqs(sc); 4788 else 4789 err = mxge_add_single_irq(sc); 4790 4791 if (0 && err == 0 && sc->num_slices > 1) { 4792 mxge_rem_msix_irqs(sc); 4793 err = mxge_add_msix_irqs(sc); 4794 } 4795 return err; 4796 } 4797 4798 4799 static int 4800 mxge_attach(device_t dev) 4801 { 4802 mxge_cmd_t cmd; 4803 mxge_softc_t *sc = device_get_softc(dev); 4804 struct ifnet *ifp; 4805 int err, rid; 4806 4807 sc->dev = dev; 4808 mxge_fetch_tunables(sc); 4809 4810 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4811 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4812 taskqueue_thread_enqueue, &sc->tq); 4813 if (sc->tq == NULL) { 4814 err = ENOMEM; 4815 goto abort_with_nothing; 4816 } 4817 4818 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4819 1, /* alignment */ 4820 0, /* boundary */ 4821 BUS_SPACE_MAXADDR, /* low */ 4822 BUS_SPACE_MAXADDR, /* high */ 4823 NULL, NULL, /* filter */ 4824 65536 + 256, /* maxsize */ 4825 MXGE_MAX_SEND_DESC, /* num segs */ 4826 65536, /* maxsegsize */ 4827 0, /* flags */ 4828 NULL, NULL, /* lock */ 4829 &sc->parent_dmat); /* tag */ 4830 4831 if (err != 0) { 4832 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4833 err); 4834 goto abort_with_tq; 4835 } 4836 4837 ifp = sc->ifp = if_alloc(IFT_ETHER); 4838 if (ifp == NULL) { 4839 device_printf(dev, "can not if_alloc()\n"); 4840 err = ENOSPC; 4841 goto abort_with_parent_dmat; 4842 } 4843 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4844 4845 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4846 device_get_nameunit(dev)); 4847 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4848 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4849 "%s:drv", device_get_nameunit(dev)); 4850 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4851 MTX_NETWORK_LOCK, MTX_DEF); 4852 4853 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4854 4855 mxge_setup_cfg_space(sc); 4856 4857 /* Map the board into the kernel */ 4858 rid = PCIR_BARS; 4859 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4860 RF_ACTIVE); 4861 if (sc->mem_res == NULL) { 4862 device_printf(dev, "could not map memory\n"); 4863 err = ENXIO; 4864 goto abort_with_lock; 4865 } 4866 sc->sram = rman_get_virtual(sc->mem_res); 4867 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4868 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4869 device_printf(dev, "impossible memory region size %jd\n", 4870 rman_get_size(sc->mem_res)); 4871 err = ENXIO; 4872 goto abort_with_mem_res; 4873 } 4874 4875 /* make NULL terminated copy of the EEPROM strings section of 4876 lanai SRAM */ 4877 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4878 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4879 rman_get_bushandle(sc->mem_res), 4880 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4881 sc->eeprom_strings, 4882 MXGE_EEPROM_STRINGS_SIZE - 2); 4883 err = mxge_parse_strings(sc); 4884 if (err != 0) 4885 goto abort_with_mem_res; 4886 4887 /* Enable write combining for efficient use of PCIe bus */ 4888 mxge_enable_wc(sc); 4889 4890 /* Allocate the out of band dma memory */ 4891 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4892 sizeof (mxge_cmd_t), 64); 4893 if (err != 0) 4894 goto abort_with_mem_res; 4895 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4896 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4897 if (err != 0) 4898 goto abort_with_cmd_dma; 4899 4900 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4901 if (err != 0) 4902 goto abort_with_zeropad_dma; 4903 4904 /* select & load the firmware */ 4905 err = mxge_select_firmware(sc); 4906 if (err != 0) 4907 goto abort_with_dmabench; 4908 sc->intr_coal_delay = mxge_intr_coal_delay; 4909 4910 mxge_slice_probe(sc); 4911 err = mxge_alloc_slices(sc); 4912 if (err != 0) 4913 goto abort_with_dmabench; 4914 4915 err = mxge_reset(sc, 0); 4916 if (err != 0) 4917 goto abort_with_slices; 4918 4919 err = mxge_alloc_rings(sc); 4920 if (err != 0) { 4921 device_printf(sc->dev, "failed to allocate rings\n"); 4922 goto abort_with_slices; 4923 } 4924 4925 err = mxge_add_irq(sc); 4926 if (err != 0) { 4927 device_printf(sc->dev, "failed to add irq\n"); 4928 goto abort_with_rings; 4929 } 4930 4931 ifp->if_baudrate = IF_Gbps(10); 4932 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4933 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4934 IFCAP_RXCSUM_IPV6; 4935 #if defined(INET) || defined(INET6) 4936 ifp->if_capabilities |= IFCAP_LRO; 4937 #endif 4938 4939 #ifdef MXGE_NEW_VLAN_API 4940 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4941 4942 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4943 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4944 sc->fw_ver_tiny >= 32) 4945 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4946 #endif 4947 sc->max_mtu = mxge_max_mtu(sc); 4948 if (sc->max_mtu >= 9000) 4949 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4950 else 4951 device_printf(dev, "MTU limited to %d. Install " 4952 "latest firmware for 9000 byte jumbo support\n", 4953 sc->max_mtu - ETHER_HDR_LEN); 4954 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4955 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4956 /* check to see if f/w supports TSO for IPv6 */ 4957 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4958 if (CSUM_TCP_IPV6) 4959 ifp->if_capabilities |= IFCAP_TSO6; 4960 sc->max_tso6_hlen = min(cmd.data0, 4961 sizeof (sc->ss[0].scratch)); 4962 } 4963 ifp->if_capenable = ifp->if_capabilities; 4964 if (sc->lro_cnt == 0) 4965 ifp->if_capenable &= ~IFCAP_LRO; 4966 ifp->if_init = mxge_init; 4967 ifp->if_softc = sc; 4968 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4969 ifp->if_ioctl = mxge_ioctl; 4970 ifp->if_start = mxge_start; 4971 ifp->if_get_counter = mxge_get_counter; 4972 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4973 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4974 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4975 /* Initialise the ifmedia structure */ 4976 ifmedia_init(&sc->media, 0, mxge_media_change, 4977 mxge_media_status); 4978 mxge_media_init(sc); 4979 mxge_media_probe(sc); 4980 sc->dying = 0; 4981 ether_ifattach(ifp, sc->mac_addr); 4982 /* ether_ifattach sets mtu to ETHERMTU */ 4983 if (mxge_initial_mtu != ETHERMTU) 4984 mxge_change_mtu(sc, mxge_initial_mtu); 4985 4986 mxge_add_sysctls(sc); 4987 #ifdef IFNET_BUF_RING 4988 ifp->if_transmit = mxge_transmit; 4989 ifp->if_qflush = mxge_qflush; 4990 #endif 4991 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4992 device_get_nameunit(sc->dev)); 4993 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4994 return 0; 4995 4996 abort_with_rings: 4997 mxge_free_rings(sc); 4998 abort_with_slices: 4999 mxge_free_slices(sc); 5000 abort_with_dmabench: 5001 mxge_dma_free(&sc->dmabench_dma); 5002 abort_with_zeropad_dma: 5003 mxge_dma_free(&sc->zeropad_dma); 5004 abort_with_cmd_dma: 5005 mxge_dma_free(&sc->cmd_dma); 5006 abort_with_mem_res: 5007 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5008 abort_with_lock: 5009 pci_disable_busmaster(dev); 5010 mtx_destroy(&sc->cmd_mtx); 5011 mtx_destroy(&sc->driver_mtx); 5012 if_free(ifp); 5013 abort_with_parent_dmat: 5014 bus_dma_tag_destroy(sc->parent_dmat); 5015 abort_with_tq: 5016 if (sc->tq != NULL) { 5017 taskqueue_drain(sc->tq, &sc->watchdog_task); 5018 taskqueue_free(sc->tq); 5019 sc->tq = NULL; 5020 } 5021 abort_with_nothing: 5022 return err; 5023 } 5024 5025 static int 5026 mxge_detach(device_t dev) 5027 { 5028 mxge_softc_t *sc = device_get_softc(dev); 5029 5030 if (mxge_vlans_active(sc)) { 5031 device_printf(sc->dev, 5032 "Detach vlans before removing module\n"); 5033 return EBUSY; 5034 } 5035 mtx_lock(&sc->driver_mtx); 5036 sc->dying = 1; 5037 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 5038 mxge_close(sc, 0); 5039 mtx_unlock(&sc->driver_mtx); 5040 ether_ifdetach(sc->ifp); 5041 if (sc->tq != NULL) { 5042 taskqueue_drain(sc->tq, &sc->watchdog_task); 5043 taskqueue_free(sc->tq); 5044 sc->tq = NULL; 5045 } 5046 callout_drain(&sc->co_hdl); 5047 ifmedia_removeall(&sc->media); 5048 mxge_dummy_rdma(sc, 0); 5049 mxge_rem_sysctls(sc); 5050 mxge_rem_irq(sc); 5051 mxge_free_rings(sc); 5052 mxge_free_slices(sc); 5053 mxge_dma_free(&sc->dmabench_dma); 5054 mxge_dma_free(&sc->zeropad_dma); 5055 mxge_dma_free(&sc->cmd_dma); 5056 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5057 pci_disable_busmaster(dev); 5058 mtx_destroy(&sc->cmd_mtx); 5059 mtx_destroy(&sc->driver_mtx); 5060 if_free(sc->ifp); 5061 bus_dma_tag_destroy(sc->parent_dmat); 5062 return 0; 5063 } 5064 5065 static int 5066 mxge_shutdown(device_t dev) 5067 { 5068 return 0; 5069 } 5070 5071 /* 5072 This file uses Myri10GE driver indentation. 5073 5074 Local Variables: 5075 c-file-style:"linux" 5076 tab-width:8 5077 End: 5078 */ 5079