1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #ifdef IFNET_BUF_RING 96 #include <sys/buf_ring.h> 97 #endif 98 99 #include "opt_inet.h" 100 #include "opt_inet6.h" 101 102 /* tunable params */ 103 static int mxge_nvidia_ecrc_enable = 1; 104 static int mxge_force_firmware = 0; 105 static int mxge_intr_coal_delay = 30; 106 static int mxge_deassert_wait = 1; 107 static int mxge_flow_control = 1; 108 static int mxge_verbose = 0; 109 static int mxge_ticks; 110 static int mxge_max_slices = 1; 111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 112 static int mxge_always_promisc = 0; 113 static int mxge_initial_mtu = ETHERMTU_JUMBO; 114 static int mxge_throttle = 0; 115 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 116 static char *mxge_fw_aligned = "mxge_eth_z8e"; 117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 119 120 static int mxge_probe(device_t dev); 121 static int mxge_attach(device_t dev); 122 static int mxge_detach(device_t dev); 123 static int mxge_shutdown(device_t dev); 124 static void mxge_intr(void *arg); 125 126 static device_method_t mxge_methods[] = 127 { 128 /* Device interface */ 129 DEVMETHOD(device_probe, mxge_probe), 130 DEVMETHOD(device_attach, mxge_attach), 131 DEVMETHOD(device_detach, mxge_detach), 132 DEVMETHOD(device_shutdown, mxge_shutdown), 133 134 DEVMETHOD_END 135 }; 136 137 static driver_t mxge_driver = 138 { 139 "mxge", 140 mxge_methods, 141 sizeof(mxge_softc_t), 142 }; 143 144 static devclass_t mxge_devclass; 145 146 /* Declare ourselves to be a child of the PCI bus.*/ 147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 148 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 149 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 150 151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 153 static int mxge_close(mxge_softc_t *sc, int down); 154 static int mxge_open(mxge_softc_t *sc); 155 static void mxge_tick(void *arg); 156 157 static int 158 mxge_probe(device_t dev) 159 { 160 int rev; 161 162 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 163 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 164 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 165 rev = pci_get_revid(dev); 166 switch (rev) { 167 case MXGE_PCI_REV_Z8E: 168 device_set_desc(dev, "Myri10G-PCIE-8A"); 169 break; 170 case MXGE_PCI_REV_Z8ES: 171 device_set_desc(dev, "Myri10G-PCIE-8B"); 172 break; 173 default: 174 device_set_desc(dev, "Myri10G-PCIE-8??"); 175 device_printf(dev, "Unrecognized rev %d NIC\n", 176 rev); 177 break; 178 } 179 return 0; 180 } 181 return ENXIO; 182 } 183 184 static void 185 mxge_enable_wc(mxge_softc_t *sc) 186 { 187 #if defined(__i386) || defined(__amd64) 188 vm_offset_t len; 189 int err; 190 191 sc->wc = 1; 192 len = rman_get_size(sc->mem_res); 193 err = pmap_change_attr((vm_offset_t) sc->sram, 194 len, PAT_WRITE_COMBINING); 195 if (err != 0) { 196 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 197 err); 198 sc->wc = 0; 199 } 200 #endif 201 } 202 203 /* callback to get our DMA address */ 204 static void 205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 206 int error) 207 { 208 if (error == 0) { 209 *(bus_addr_t *) arg = segs->ds_addr; 210 } 211 } 212 213 static int 214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 215 bus_size_t alignment) 216 { 217 int err; 218 device_t dev = sc->dev; 219 bus_size_t boundary, maxsegsize; 220 221 if (bytes > 4096 && alignment == 4096) { 222 boundary = 0; 223 maxsegsize = bytes; 224 } else { 225 boundary = 4096; 226 maxsegsize = 4096; 227 } 228 229 /* allocate DMAable memory tags */ 230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 231 alignment, /* alignment */ 232 boundary, /* boundary */ 233 BUS_SPACE_MAXADDR, /* low */ 234 BUS_SPACE_MAXADDR, /* high */ 235 NULL, NULL, /* filter */ 236 bytes, /* maxsize */ 237 1, /* num segs */ 238 maxsegsize, /* maxsegsize */ 239 BUS_DMA_COHERENT, /* flags */ 240 NULL, NULL, /* lock */ 241 &dma->dmat); /* tag */ 242 if (err != 0) { 243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 244 return err; 245 } 246 247 /* allocate DMAable memory & map */ 248 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 250 | BUS_DMA_ZERO), &dma->map); 251 if (err != 0) { 252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 253 goto abort_with_dmat; 254 } 255 256 /* load the memory */ 257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 258 mxge_dmamap_callback, 259 (void *)&dma->bus_addr, 0); 260 if (err != 0) { 261 device_printf(dev, "couldn't load map (err = %d)\n", err); 262 goto abort_with_mem; 263 } 264 return 0; 265 266 abort_with_mem: 267 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 268 abort_with_dmat: 269 (void)bus_dma_tag_destroy(dma->dmat); 270 return err; 271 } 272 273 static void 274 mxge_dma_free(mxge_dma_t *dma) 275 { 276 bus_dmamap_unload(dma->dmat, dma->map); 277 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 278 (void)bus_dma_tag_destroy(dma->dmat); 279 } 280 281 /* 282 * The eeprom strings on the lanaiX have the format 283 * SN=x\0 284 * MAC=x:x:x:x:x:x\0 285 * PC=text\0 286 */ 287 288 static int 289 mxge_parse_strings(mxge_softc_t *sc) 290 { 291 char *ptr; 292 int i, found_mac, found_sn2; 293 char *endptr; 294 295 ptr = sc->eeprom_strings; 296 found_mac = 0; 297 found_sn2 = 0; 298 while (*ptr != '\0') { 299 if (strncmp(ptr, "MAC=", 4) == 0) { 300 ptr += 4; 301 for (i = 0;;) { 302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 303 if (endptr - ptr != 2) 304 goto abort; 305 ptr = endptr; 306 if (++i == 6) 307 break; 308 if (*ptr++ != ':') 309 goto abort; 310 } 311 found_mac = 1; 312 } else if (strncmp(ptr, "PC=", 3) == 0) { 313 ptr += 3; 314 strlcpy(sc->product_code_string, ptr, 315 sizeof(sc->product_code_string)); 316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 317 ptr += 3; 318 strlcpy(sc->serial_number_string, ptr, 319 sizeof(sc->serial_number_string)); 320 } else if (strncmp(ptr, "SN2=", 4) == 0) { 321 /* SN2 takes precedence over SN */ 322 ptr += 4; 323 found_sn2 = 1; 324 strlcpy(sc->serial_number_string, ptr, 325 sizeof(sc->serial_number_string)); 326 } 327 while (*ptr++ != '\0') {} 328 } 329 330 if (found_mac) 331 return 0; 332 333 abort: 334 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 335 336 return ENXIO; 337 } 338 339 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 340 static void 341 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 342 { 343 uint32_t val; 344 unsigned long base, off; 345 char *va, *cfgptr; 346 device_t pdev, mcp55; 347 uint16_t vendor_id, device_id, word; 348 uintptr_t bus, slot, func, ivend, idev; 349 uint32_t *ptr32; 350 351 if (!mxge_nvidia_ecrc_enable) 352 return; 353 354 pdev = device_get_parent(device_get_parent(sc->dev)); 355 if (pdev == NULL) { 356 device_printf(sc->dev, "could not find parent?\n"); 357 return; 358 } 359 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 360 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 361 362 if (vendor_id != 0x10de) 363 return; 364 365 base = 0; 366 367 if (device_id == 0x005d) { 368 /* ck804, base address is magic */ 369 base = 0xe0000000UL; 370 } else if (device_id >= 0x0374 && device_id <= 0x378) { 371 /* mcp55, base address stored in chipset */ 372 mcp55 = pci_find_bsf(0, 0, 0); 373 if (mcp55 && 374 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 375 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 376 word = pci_read_config(mcp55, 0x90, 2); 377 base = ((unsigned long)word & 0x7ffeU) << 25; 378 } 379 } 380 if (!base) 381 return; 382 383 /* XXXX 384 Test below is commented because it is believed that doing 385 config read/write beyond 0xff will access the config space 386 for the next larger function. Uncomment this and remove 387 the hacky pmap_mapdev() way of accessing config space when 388 FreeBSD grows support for extended pcie config space access 389 */ 390 #if 0 391 /* See if we can, by some miracle, access the extended 392 config space */ 393 val = pci_read_config(pdev, 0x178, 4); 394 if (val != 0xffffffff) { 395 val |= 0x40; 396 pci_write_config(pdev, 0x178, val, 4); 397 return; 398 } 399 #endif 400 /* Rather than using normal pci config space writes, we must 401 * map the Nvidia config space ourselves. This is because on 402 * opteron/nvidia class machine the 0xe000000 mapping is 403 * handled by the nvidia chipset, that means the internal PCI 404 * device (the on-chip northbridge), or the amd-8131 bridge 405 * and things behind them are not visible by this method. 406 */ 407 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_BUS, &bus); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_SLOT, &slot); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_FUNCTION, &func); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_VENDOR, &ivend); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_DEVICE, &idev); 418 419 off = base 420 + 0x00100000UL * (unsigned long)bus 421 + 0x00001000UL * (unsigned long)(func 422 + 8 * slot); 423 424 /* map it into the kernel */ 425 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 426 427 if (va == NULL) { 428 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 429 return; 430 } 431 /* get a pointer to the config space mapped into the kernel */ 432 cfgptr = va + (off & PAGE_MASK); 433 434 /* make sure that we can really access it */ 435 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 436 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 437 if (! (vendor_id == ivend && device_id == idev)) { 438 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 439 vendor_id, device_id); 440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 441 return; 442 } 443 444 ptr32 = (uint32_t*)(cfgptr + 0x178); 445 val = *ptr32; 446 447 if (val == 0xffffffff) { 448 device_printf(sc->dev, "extended mapping failed\n"); 449 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 450 return; 451 } 452 *ptr32 = val | 0x40; 453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 454 if (mxge_verbose) 455 device_printf(sc->dev, 456 "Enabled ECRC on upstream Nvidia bridge " 457 "at %d:%d:%d\n", 458 (int)bus, (int)slot, (int)func); 459 return; 460 } 461 #else 462 static void 463 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 464 { 465 device_printf(sc->dev, 466 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 467 return; 468 } 469 #endif 470 471 static int 472 mxge_dma_test(mxge_softc_t *sc, int test_type) 473 { 474 mxge_cmd_t cmd; 475 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 476 int status; 477 uint32_t len; 478 char *test = " "; 479 480 /* Run a small DMA test. 481 * The magic multipliers to the length tell the firmware 482 * to do DMA read, write, or read+write tests. The 483 * results are returned in cmd.data0. The upper 16 484 * bits of the return is the number of transfers completed. 485 * The lower 16 bits is the time in 0.5us ticks that the 486 * transfers took to complete. 487 */ 488 489 len = sc->tx_boundary; 490 491 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 492 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 493 cmd.data2 = len * 0x10000; 494 status = mxge_send_cmd(sc, test_type, &cmd); 495 if (status != 0) { 496 test = "read"; 497 goto abort; 498 } 499 sc->read_dma = ((cmd.data0>>16) * len * 2) / 500 (cmd.data0 & 0xffff); 501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 503 cmd.data2 = len * 0x1; 504 status = mxge_send_cmd(sc, test_type, &cmd); 505 if (status != 0) { 506 test = "write"; 507 goto abort; 508 } 509 sc->write_dma = ((cmd.data0>>16) * len * 2) / 510 (cmd.data0 & 0xffff); 511 512 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 513 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 514 cmd.data2 = len * 0x10001; 515 status = mxge_send_cmd(sc, test_type, &cmd); 516 if (status != 0) { 517 test = "read/write"; 518 goto abort; 519 } 520 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 521 (cmd.data0 & 0xffff); 522 523 abort: 524 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 525 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 526 test, status); 527 528 return status; 529 } 530 531 /* 532 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 533 * when the PCI-E Completion packets are aligned on an 8-byte 534 * boundary. Some PCI-E chip sets always align Completion packets; on 535 * the ones that do not, the alignment can be enforced by enabling 536 * ECRC generation (if supported). 537 * 538 * When PCI-E Completion packets are not aligned, it is actually more 539 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 540 * 541 * If the driver can neither enable ECRC nor verify that it has 542 * already been enabled, then it must use a firmware image which works 543 * around unaligned completion packets (ethp_z8e.dat), and it should 544 * also ensure that it never gives the device a Read-DMA which is 545 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 546 * enabled, then the driver should use the aligned (eth_z8e.dat) 547 * firmware image, and set tx_boundary to 4KB. 548 */ 549 550 static int 551 mxge_firmware_probe(mxge_softc_t *sc) 552 { 553 device_t dev = sc->dev; 554 int reg, status; 555 uint16_t pectl; 556 557 sc->tx_boundary = 4096; 558 /* 559 * Verify the max read request size was set to 4KB 560 * before trying the test with 4KB. 561 */ 562 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 563 pectl = pci_read_config(dev, reg + 0x8, 2); 564 if ((pectl & (5 << 12)) != (5 << 12)) { 565 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 566 pectl); 567 sc->tx_boundary = 2048; 568 } 569 } 570 571 /* 572 * load the optimized firmware (which assumes aligned PCIe 573 * completions) in order to see if it works on this host. 574 */ 575 sc->fw_name = mxge_fw_aligned; 576 status = mxge_load_firmware(sc, 1); 577 if (status != 0) { 578 return status; 579 } 580 581 /* 582 * Enable ECRC if possible 583 */ 584 mxge_enable_nvidia_ecrc(sc); 585 586 /* 587 * Run a DMA test which watches for unaligned completions and 588 * aborts on the first one seen. Not required on Z8ES or newer. 589 */ 590 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 591 return 0; 592 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 593 if (status == 0) 594 return 0; /* keep the aligned firmware */ 595 596 if (status != E2BIG) 597 device_printf(dev, "DMA test failed: %d\n", status); 598 if (status == ENOSYS) 599 device_printf(dev, "Falling back to ethp! " 600 "Please install up to date fw\n"); 601 return status; 602 } 603 604 static int 605 mxge_select_firmware(mxge_softc_t *sc) 606 { 607 int aligned = 0; 608 int force_firmware = mxge_force_firmware; 609 610 if (sc->throttle) 611 force_firmware = sc->throttle; 612 613 if (force_firmware != 0) { 614 if (force_firmware == 1) 615 aligned = 1; 616 else 617 aligned = 0; 618 if (mxge_verbose) 619 device_printf(sc->dev, 620 "Assuming %s completions (forced)\n", 621 aligned ? "aligned" : "unaligned"); 622 goto abort; 623 } 624 625 /* if the PCIe link width is 4 or less, we can use the aligned 626 firmware and skip any checks */ 627 if (sc->link_width != 0 && sc->link_width <= 4) { 628 device_printf(sc->dev, 629 "PCIe x%d Link, expect reduced performance\n", 630 sc->link_width); 631 aligned = 1; 632 goto abort; 633 } 634 635 if (0 == mxge_firmware_probe(sc)) 636 return 0; 637 638 abort: 639 if (aligned) { 640 sc->fw_name = mxge_fw_aligned; 641 sc->tx_boundary = 4096; 642 } else { 643 sc->fw_name = mxge_fw_unaligned; 644 sc->tx_boundary = 2048; 645 } 646 return (mxge_load_firmware(sc, 0)); 647 } 648 649 static int 650 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 651 { 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static int 680 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 681 { 682 z_stream zs; 683 char *inflate_buffer; 684 const struct firmware *fw; 685 const mcp_gen_header_t *hdr; 686 unsigned hdr_offset; 687 int status; 688 unsigned int i; 689 size_t fw_len; 690 691 fw = firmware_get(sc->fw_name); 692 if (fw == NULL) { 693 device_printf(sc->dev, "Could not find firmware image %s\n", 694 sc->fw_name); 695 return ENOENT; 696 } 697 698 /* setup zlib and decompress f/w */ 699 bzero(&zs, sizeof (zs)); 700 zs.zalloc = zcalloc_nowait; 701 zs.zfree = zcfree; 702 status = inflateInit(&zs); 703 if (status != Z_OK) { 704 status = EIO; 705 goto abort_with_fw; 706 } 707 708 /* the uncompressed size is stored as the firmware version, 709 which would otherwise go unused */ 710 fw_len = (size_t) fw->version; 711 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 712 if (inflate_buffer == NULL) 713 goto abort_with_zs; 714 zs.avail_in = fw->datasize; 715 zs.next_in = __DECONST(char *, fw->data); 716 zs.avail_out = fw_len; 717 zs.next_out = inflate_buffer; 718 status = inflate(&zs, Z_FINISH); 719 if (status != Z_STREAM_END) { 720 device_printf(sc->dev, "zlib %d\n", status); 721 status = EIO; 722 goto abort_with_buffer; 723 } 724 725 /* check id */ 726 hdr_offset = htobe32(*(const uint32_t *) 727 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 728 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 729 device_printf(sc->dev, "Bad firmware file"); 730 status = EIO; 731 goto abort_with_buffer; 732 } 733 hdr = (const void*)(inflate_buffer + hdr_offset); 734 735 status = mxge_validate_firmware(sc, hdr); 736 if (status != 0) 737 goto abort_with_buffer; 738 739 /* Copy the inflated firmware to NIC SRAM. */ 740 for (i = 0; i < fw_len; i += 256) { 741 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 742 inflate_buffer + i, 743 min(256U, (unsigned)(fw_len - i))); 744 wmb(); 745 (void)*sc->sram; 746 wmb(); 747 } 748 749 *limit = fw_len; 750 status = 0; 751 abort_with_buffer: 752 free(inflate_buffer, M_TEMP); 753 abort_with_zs: 754 inflateEnd(&zs); 755 abort_with_fw: 756 firmware_put(fw, FIRMWARE_UNLOAD); 757 return status; 758 } 759 760 /* 761 * Enable or disable periodic RDMAs from the host to make certain 762 * chipsets resend dropped PCIe messages 763 */ 764 765 static void 766 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 767 { 768 char buf_bytes[72]; 769 volatile uint32_t *confirm; 770 volatile char *submit; 771 uint32_t *buf, dma_low, dma_high; 772 int i; 773 774 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 775 776 /* clear confirmation addr */ 777 confirm = (volatile uint32_t *)sc->cmd; 778 *confirm = 0; 779 wmb(); 780 781 /* send an rdma command to the PCIe engine, and wait for the 782 response in the confirmation address. The firmware should 783 write a -1 there to indicate it is alive and well 784 */ 785 786 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 787 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 788 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 789 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 790 buf[2] = htobe32(0xffffffff); /* confirm data */ 791 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 792 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 793 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 794 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 795 buf[5] = htobe32(enable); /* enable? */ 796 797 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 798 799 mxge_pio_copy(submit, buf, 64); 800 wmb(); 801 DELAY(1000); 802 wmb(); 803 i = 0; 804 while (*confirm != 0xffffffff && i < 20) { 805 DELAY(1000); 806 i++; 807 } 808 if (*confirm != 0xffffffff) { 809 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 810 (enable ? "enable" : "disable"), confirm, 811 *confirm); 812 } 813 return; 814 } 815 816 static int 817 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 818 { 819 mcp_cmd_t *buf; 820 char buf_bytes[sizeof(*buf) + 8]; 821 volatile mcp_cmd_response_t *response = sc->cmd; 822 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 823 uint32_t dma_low, dma_high; 824 int err, sleep_total = 0; 825 826 /* ensure buf is aligned to 8 bytes */ 827 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 828 829 buf->data0 = htobe32(data->data0); 830 buf->data1 = htobe32(data->data1); 831 buf->data2 = htobe32(data->data2); 832 buf->cmd = htobe32(cmd); 833 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 834 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 835 836 buf->response_addr.low = htobe32(dma_low); 837 buf->response_addr.high = htobe32(dma_high); 838 mtx_lock(&sc->cmd_mtx); 839 response->result = 0xffffffff; 840 wmb(); 841 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 842 843 /* wait up to 20ms */ 844 err = EAGAIN; 845 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 846 bus_dmamap_sync(sc->cmd_dma.dmat, 847 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 848 wmb(); 849 switch (be32toh(response->result)) { 850 case 0: 851 data->data0 = be32toh(response->data); 852 err = 0; 853 break; 854 case 0xffffffff: 855 DELAY(1000); 856 break; 857 case MXGEFW_CMD_UNKNOWN: 858 err = ENOSYS; 859 break; 860 case MXGEFW_CMD_ERROR_UNALIGNED: 861 err = E2BIG; 862 break; 863 case MXGEFW_CMD_ERROR_BUSY: 864 err = EBUSY; 865 break; 866 case MXGEFW_CMD_ERROR_I2C_ABSENT: 867 err = ENXIO; 868 break; 869 default: 870 device_printf(sc->dev, 871 "mxge: command %d " 872 "failed, result = %d\n", 873 cmd, be32toh(response->result)); 874 err = ENXIO; 875 break; 876 } 877 if (err != EAGAIN) 878 break; 879 } 880 if (err == EAGAIN) 881 device_printf(sc->dev, "mxge: command %d timed out" 882 "result = %d\n", 883 cmd, be32toh(response->result)); 884 mtx_unlock(&sc->cmd_mtx); 885 return err; 886 } 887 888 static int 889 mxge_adopt_running_firmware(mxge_softc_t *sc) 890 { 891 struct mcp_gen_header *hdr; 892 const size_t bytes = sizeof (struct mcp_gen_header); 893 size_t hdr_offset; 894 int status; 895 896 /* find running firmware header */ 897 hdr_offset = htobe32(*(volatile uint32_t *) 898 (sc->sram + MCP_HEADER_PTR_OFFSET)); 899 900 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 901 device_printf(sc->dev, 902 "Running firmware has bad header offset (%d)\n", 903 (int)hdr_offset); 904 return EIO; 905 } 906 907 /* copy header of running firmware from SRAM to host memory to 908 * validate firmware */ 909 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 910 if (hdr == NULL) { 911 device_printf(sc->dev, "could not malloc firmware hdr\n"); 912 return ENOMEM; 913 } 914 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 915 rman_get_bushandle(sc->mem_res), 916 hdr_offset, (char *)hdr, bytes); 917 status = mxge_validate_firmware(sc, hdr); 918 free(hdr, M_DEVBUF); 919 920 /* 921 * check to see if adopted firmware has bug where adopting 922 * it will cause broadcasts to be filtered unless the NIC 923 * is kept in ALLMULTI mode 924 */ 925 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 926 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 927 sc->adopted_rx_filter_bug = 1; 928 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 929 "working around rx filter bug\n", 930 sc->fw_ver_major, sc->fw_ver_minor, 931 sc->fw_ver_tiny); 932 } 933 934 return status; 935 } 936 937 static int 938 mxge_load_firmware(mxge_softc_t *sc, int adopt) 939 { 940 volatile uint32_t *confirm; 941 volatile char *submit; 942 char buf_bytes[72]; 943 uint32_t *buf, size, dma_low, dma_high; 944 int status, i; 945 946 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 947 948 size = sc->sram_size; 949 status = mxge_load_firmware_helper(sc, &size); 950 if (status) { 951 if (!adopt) 952 return status; 953 /* Try to use the currently running firmware, if 954 it is new enough */ 955 status = mxge_adopt_running_firmware(sc); 956 if (status) { 957 device_printf(sc->dev, 958 "failed to adopt running firmware\n"); 959 return status; 960 } 961 device_printf(sc->dev, 962 "Successfully adopted running firmware\n"); 963 if (sc->tx_boundary == 4096) { 964 device_printf(sc->dev, 965 "Using firmware currently running on NIC" 966 ". For optimal\n"); 967 device_printf(sc->dev, 968 "performance consider loading optimized " 969 "firmware\n"); 970 } 971 sc->fw_name = mxge_fw_unaligned; 972 sc->tx_boundary = 2048; 973 return 0; 974 } 975 /* clear confirmation addr */ 976 confirm = (volatile uint32_t *)sc->cmd; 977 *confirm = 0; 978 wmb(); 979 /* send a reload command to the bootstrap MCP, and wait for the 980 response in the confirmation address. The firmware should 981 write a -1 there to indicate it is alive and well 982 */ 983 984 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 985 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 986 987 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 988 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 989 buf[2] = htobe32(0xffffffff); /* confirm data */ 990 991 /* FIX: All newest firmware should un-protect the bottom of 992 the sram before handoff. However, the very first interfaces 993 do not. Therefore the handoff copy must skip the first 8 bytes 994 */ 995 /* where the code starts*/ 996 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 997 buf[4] = htobe32(size - 8); /* length of code */ 998 buf[5] = htobe32(8); /* where to copy to */ 999 buf[6] = htobe32(0); /* where to jump to */ 1000 1001 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1002 mxge_pio_copy(submit, buf, 64); 1003 wmb(); 1004 DELAY(1000); 1005 wmb(); 1006 i = 0; 1007 while (*confirm != 0xffffffff && i < 20) { 1008 DELAY(1000*10); 1009 i++; 1010 bus_dmamap_sync(sc->cmd_dma.dmat, 1011 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1012 } 1013 if (*confirm != 0xffffffff) { 1014 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1015 confirm, *confirm); 1016 1017 return ENXIO; 1018 } 1019 return 0; 1020 } 1021 1022 static int 1023 mxge_update_mac_address(mxge_softc_t *sc) 1024 { 1025 mxge_cmd_t cmd; 1026 uint8_t *addr = sc->mac_addr; 1027 int status; 1028 1029 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1030 | (addr[2] << 8) | addr[3]); 1031 1032 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1033 1034 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1035 return status; 1036 } 1037 1038 static int 1039 mxge_change_pause(mxge_softc_t *sc, int pause) 1040 { 1041 mxge_cmd_t cmd; 1042 int status; 1043 1044 if (pause) 1045 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1046 &cmd); 1047 else 1048 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1049 &cmd); 1050 1051 if (status) { 1052 device_printf(sc->dev, "Failed to set flow control mode\n"); 1053 return ENXIO; 1054 } 1055 sc->pause = pause; 1056 return 0; 1057 } 1058 1059 static void 1060 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1061 { 1062 mxge_cmd_t cmd; 1063 int status; 1064 1065 if (mxge_always_promisc) 1066 promisc = 1; 1067 1068 if (promisc) 1069 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1070 &cmd); 1071 else 1072 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1073 &cmd); 1074 1075 if (status) { 1076 device_printf(sc->dev, "Failed to set promisc mode\n"); 1077 } 1078 } 1079 1080 struct mxge_add_maddr_ctx { 1081 mxge_softc_t *sc; 1082 int error; 1083 }; 1084 1085 static u_int 1086 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1087 { 1088 struct mxge_add_maddr_ctx *ctx = arg; 1089 mxge_cmd_t cmd; 1090 1091 if (ctx->error != 0) 1092 return (0); 1093 bcopy(LLADDR(sdl), &cmd.data0, 4); 1094 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1095 cmd.data0 = htonl(cmd.data0); 1096 cmd.data1 = htonl(cmd.data1); 1097 1098 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1099 1100 return (1); 1101 } 1102 1103 static void 1104 mxge_set_multicast_list(mxge_softc_t *sc) 1105 { 1106 struct mxge_add_maddr_ctx ctx; 1107 struct ifnet *ifp = sc->ifp; 1108 mxge_cmd_t cmd; 1109 int err; 1110 1111 /* This firmware is known to not support multicast */ 1112 if (!sc->fw_multicast_support) 1113 return; 1114 1115 /* Disable multicast filtering while we play with the lists*/ 1116 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1117 if (err != 0) { 1118 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1119 " error status: %d\n", err); 1120 return; 1121 } 1122 1123 if (sc->adopted_rx_filter_bug) 1124 return; 1125 1126 if (ifp->if_flags & IFF_ALLMULTI) 1127 /* request to disable multicast filtering, so quit here */ 1128 return; 1129 1130 /* Flush all the filters */ 1131 1132 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1133 if (err != 0) { 1134 device_printf(sc->dev, 1135 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1136 ", error status: %d\n", err); 1137 return; 1138 } 1139 1140 /* Walk the multicast list, and add each address */ 1141 ctx.sc = sc; 1142 ctx.error = 0; 1143 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1144 if (ctx.error != 0) { 1145 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1146 "error status:" "%d\t", ctx.error); 1147 /* abort, leaving multicast filtering off */ 1148 return; 1149 } 1150 1151 /* Enable multicast filtering */ 1152 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1153 if (err != 0) { 1154 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1155 ", error status: %d\n", err); 1156 } 1157 } 1158 1159 static int 1160 mxge_max_mtu(mxge_softc_t *sc) 1161 { 1162 mxge_cmd_t cmd; 1163 int status; 1164 1165 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1166 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1167 1168 /* try to set nbufs to see if it we can 1169 use virtually contiguous jumbos */ 1170 cmd.data0 = 0; 1171 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1172 &cmd); 1173 if (status == 0) 1174 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1175 1176 /* otherwise, we're limited to MJUMPAGESIZE */ 1177 return MJUMPAGESIZE - MXGEFW_PAD; 1178 } 1179 1180 static int 1181 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1182 { 1183 struct mxge_slice_state *ss; 1184 mxge_rx_done_t *rx_done; 1185 volatile uint32_t *irq_claim; 1186 mxge_cmd_t cmd; 1187 int slice, status; 1188 1189 /* try to send a reset command to the card to see if it 1190 is alive */ 1191 memset(&cmd, 0, sizeof (cmd)); 1192 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1193 if (status != 0) { 1194 device_printf(sc->dev, "failed reset\n"); 1195 return ENXIO; 1196 } 1197 1198 mxge_dummy_rdma(sc, 1); 1199 1200 /* set the intrq size */ 1201 cmd.data0 = sc->rx_ring_size; 1202 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1203 1204 /* 1205 * Even though we already know how many slices are supported 1206 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1207 * has magic side effects, and must be called after a reset. 1208 * It must be called prior to calling any RSS related cmds, 1209 * including assigning an interrupt queue for anything but 1210 * slice 0. It must also be called *after* 1211 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1212 * the firmware to compute offsets. 1213 */ 1214 1215 if (sc->num_slices > 1) { 1216 /* ask the maximum number of slices it supports */ 1217 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1218 &cmd); 1219 if (status != 0) { 1220 device_printf(sc->dev, 1221 "failed to get number of slices\n"); 1222 return status; 1223 } 1224 /* 1225 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1226 * to setting up the interrupt queue DMA 1227 */ 1228 cmd.data0 = sc->num_slices; 1229 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1230 #ifdef IFNET_BUF_RING 1231 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1232 #endif 1233 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1234 &cmd); 1235 if (status != 0) { 1236 device_printf(sc->dev, 1237 "failed to set number of slices\n"); 1238 return status; 1239 } 1240 } 1241 1242 if (interrupts_setup) { 1243 /* Now exchange information about interrupts */ 1244 for (slice = 0; slice < sc->num_slices; slice++) { 1245 rx_done = &sc->ss[slice].rx_done; 1246 memset(rx_done->entry, 0, sc->rx_ring_size); 1247 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1248 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1249 cmd.data2 = slice; 1250 status |= mxge_send_cmd(sc, 1251 MXGEFW_CMD_SET_INTRQ_DMA, 1252 &cmd); 1253 } 1254 } 1255 1256 status |= mxge_send_cmd(sc, 1257 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1258 1259 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1260 1261 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1262 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1263 1264 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1265 &cmd); 1266 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1267 if (status != 0) { 1268 device_printf(sc->dev, "failed set interrupt parameters\n"); 1269 return status; 1270 } 1271 1272 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1273 1274 /* run a DMA benchmark */ 1275 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1276 1277 for (slice = 0; slice < sc->num_slices; slice++) { 1278 ss = &sc->ss[slice]; 1279 1280 ss->irq_claim = irq_claim + (2 * slice); 1281 /* reset mcp/driver shared state back to 0 */ 1282 ss->rx_done.idx = 0; 1283 ss->rx_done.cnt = 0; 1284 ss->tx.req = 0; 1285 ss->tx.done = 0; 1286 ss->tx.pkt_done = 0; 1287 ss->tx.queue_active = 0; 1288 ss->tx.activate = 0; 1289 ss->tx.deactivate = 0; 1290 ss->tx.wake = 0; 1291 ss->tx.defrag = 0; 1292 ss->tx.stall = 0; 1293 ss->rx_big.cnt = 0; 1294 ss->rx_small.cnt = 0; 1295 ss->lc.lro_bad_csum = 0; 1296 ss->lc.lro_queued = 0; 1297 ss->lc.lro_flushed = 0; 1298 if (ss->fw_stats != NULL) { 1299 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1300 } 1301 } 1302 sc->rdma_tags_available = 15; 1303 status = mxge_update_mac_address(sc); 1304 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1305 mxge_change_pause(sc, sc->pause); 1306 mxge_set_multicast_list(sc); 1307 if (sc->throttle) { 1308 cmd.data0 = sc->throttle; 1309 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1310 &cmd)) { 1311 device_printf(sc->dev, 1312 "can't enable throttle\n"); 1313 } 1314 } 1315 return status; 1316 } 1317 1318 static int 1319 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1320 { 1321 mxge_cmd_t cmd; 1322 mxge_softc_t *sc; 1323 int err; 1324 unsigned int throttle; 1325 1326 sc = arg1; 1327 throttle = sc->throttle; 1328 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1329 if (err != 0) { 1330 return err; 1331 } 1332 1333 if (throttle == sc->throttle) 1334 return 0; 1335 1336 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1337 return EINVAL; 1338 1339 mtx_lock(&sc->driver_mtx); 1340 cmd.data0 = throttle; 1341 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1342 if (err == 0) 1343 sc->throttle = throttle; 1344 mtx_unlock(&sc->driver_mtx); 1345 return err; 1346 } 1347 1348 static int 1349 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1350 { 1351 mxge_softc_t *sc; 1352 unsigned int intr_coal_delay; 1353 int err; 1354 1355 sc = arg1; 1356 intr_coal_delay = sc->intr_coal_delay; 1357 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1358 if (err != 0) { 1359 return err; 1360 } 1361 if (intr_coal_delay == sc->intr_coal_delay) 1362 return 0; 1363 1364 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1365 return EINVAL; 1366 1367 mtx_lock(&sc->driver_mtx); 1368 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1369 sc->intr_coal_delay = intr_coal_delay; 1370 1371 mtx_unlock(&sc->driver_mtx); 1372 return err; 1373 } 1374 1375 static int 1376 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1377 { 1378 mxge_softc_t *sc; 1379 unsigned int enabled; 1380 int err; 1381 1382 sc = arg1; 1383 enabled = sc->pause; 1384 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1385 if (err != 0) { 1386 return err; 1387 } 1388 if (enabled == sc->pause) 1389 return 0; 1390 1391 mtx_lock(&sc->driver_mtx); 1392 err = mxge_change_pause(sc, enabled); 1393 mtx_unlock(&sc->driver_mtx); 1394 return err; 1395 } 1396 1397 static int 1398 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1399 { 1400 int err; 1401 1402 if (arg1 == NULL) 1403 return EFAULT; 1404 arg2 = be32toh(*(int *)arg1); 1405 arg1 = NULL; 1406 err = sysctl_handle_int(oidp, arg1, arg2, req); 1407 1408 return err; 1409 } 1410 1411 static void 1412 mxge_rem_sysctls(mxge_softc_t *sc) 1413 { 1414 struct mxge_slice_state *ss; 1415 int slice; 1416 1417 if (sc->slice_sysctl_tree == NULL) 1418 return; 1419 1420 for (slice = 0; slice < sc->num_slices; slice++) { 1421 ss = &sc->ss[slice]; 1422 if (ss == NULL || ss->sysctl_tree == NULL) 1423 continue; 1424 sysctl_ctx_free(&ss->sysctl_ctx); 1425 ss->sysctl_tree = NULL; 1426 } 1427 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1428 sc->slice_sysctl_tree = NULL; 1429 } 1430 1431 static void 1432 mxge_add_sysctls(mxge_softc_t *sc) 1433 { 1434 struct sysctl_ctx_list *ctx; 1435 struct sysctl_oid_list *children; 1436 mcp_irq_data_t *fw; 1437 struct mxge_slice_state *ss; 1438 int slice; 1439 char slice_num[8]; 1440 1441 ctx = device_get_sysctl_ctx(sc->dev); 1442 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1443 fw = sc->ss[0].fw_stats; 1444 1445 /* random information */ 1446 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1447 "firmware_version", 1448 CTLFLAG_RD, sc->fw_version, 1449 0, "firmware version"); 1450 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1451 "serial_number", 1452 CTLFLAG_RD, sc->serial_number_string, 1453 0, "serial number"); 1454 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1455 "product_code", 1456 CTLFLAG_RD, sc->product_code_string, 1457 0, "product_code"); 1458 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1459 "pcie_link_width", 1460 CTLFLAG_RD, &sc->link_width, 1461 0, "tx_boundary"); 1462 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1463 "tx_boundary", 1464 CTLFLAG_RD, &sc->tx_boundary, 1465 0, "tx_boundary"); 1466 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1467 "write_combine", 1468 CTLFLAG_RD, &sc->wc, 1469 0, "write combining PIO?"); 1470 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1471 "read_dma_MBs", 1472 CTLFLAG_RD, &sc->read_dma, 1473 0, "DMA Read speed in MB/s"); 1474 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1475 "write_dma_MBs", 1476 CTLFLAG_RD, &sc->write_dma, 1477 0, "DMA Write speed in MB/s"); 1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1479 "read_write_dma_MBs", 1480 CTLFLAG_RD, &sc->read_write_dma, 1481 0, "DMA concurrent Read/Write speed in MB/s"); 1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1483 "watchdog_resets", 1484 CTLFLAG_RD, &sc->watchdog_resets, 1485 0, "Number of times NIC was reset"); 1486 1487 /* performance related tunables */ 1488 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1489 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1490 sc, 0, mxge_change_intr_coal, "I", 1491 "interrupt coalescing delay in usecs"); 1492 1493 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1494 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1495 mxge_change_throttle, "I", "transmit throttling"); 1496 1497 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1498 "flow_control_enabled", 1499 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1500 mxge_change_flow_control, "I", 1501 "interrupt coalescing delay in usecs"); 1502 1503 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1504 "deassert_wait", 1505 CTLFLAG_RW, &mxge_deassert_wait, 1506 0, "Wait for IRQ line to go low in ihandler"); 1507 1508 /* stats block from firmware is in network byte order. 1509 Need to swap it */ 1510 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1511 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1512 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1513 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1514 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1515 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1516 "rdma_tags_available"); 1517 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1518 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1519 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1520 "dropped_bad_crc32"); 1521 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1522 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1523 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1525 "dropped_link_error_or_filtered", 1526 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1527 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1528 "dropped_link_error_or_filtered"); 1529 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1530 "dropped_link_overflow", 1531 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1532 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1533 "dropped_link_overflow"); 1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1535 "dropped_multicast_filtered", 1536 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1537 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1538 "dropped_multicast_filtered"); 1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1540 "dropped_no_big_buffer", 1541 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1542 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1543 "dropped_no_big_buffer"); 1544 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1545 "dropped_no_small_buffer", 1546 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1547 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1548 "dropped_no_small_buffer"); 1549 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1550 "dropped_overrun", 1551 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1552 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1553 "dropped_overrun"); 1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1555 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1556 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1557 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1558 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1559 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1560 1561 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1562 "dropped_unicast_filtered", 1563 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1564 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1565 "dropped_unicast_filtered"); 1566 1567 /* verbose printing? */ 1568 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1569 "verbose", 1570 CTLFLAG_RW, &mxge_verbose, 1571 0, "verbose printing"); 1572 1573 /* add counters exported for debugging from all slices */ 1574 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1575 sc->slice_sysctl_tree = 1576 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1577 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1578 1579 for (slice = 0; slice < sc->num_slices; slice++) { 1580 ss = &sc->ss[slice]; 1581 sysctl_ctx_init(&ss->sysctl_ctx); 1582 ctx = &ss->sysctl_ctx; 1583 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1584 sprintf(slice_num, "%d", slice); 1585 ss->sysctl_tree = 1586 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1587 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1588 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1589 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1590 "rx_small_cnt", 1591 CTLFLAG_RD, &ss->rx_small.cnt, 1592 0, "rx_small_cnt"); 1593 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1594 "rx_big_cnt", 1595 CTLFLAG_RD, &ss->rx_big.cnt, 1596 0, "rx_small_cnt"); 1597 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1598 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1599 0, "number of lro merge queues flushed"); 1600 1601 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1602 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1603 0, "number of bad csums preventing LRO"); 1604 1605 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1606 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1607 0, "number of frames appended to lro merge" 1608 "queues"); 1609 1610 #ifndef IFNET_BUF_RING 1611 /* only transmit from slice 0 for now */ 1612 if (slice > 0) 1613 continue; 1614 #endif 1615 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1616 "tx_req", 1617 CTLFLAG_RD, &ss->tx.req, 1618 0, "tx_req"); 1619 1620 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1621 "tx_done", 1622 CTLFLAG_RD, &ss->tx.done, 1623 0, "tx_done"); 1624 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1625 "tx_pkt_done", 1626 CTLFLAG_RD, &ss->tx.pkt_done, 1627 0, "tx_done"); 1628 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1629 "tx_stall", 1630 CTLFLAG_RD, &ss->tx.stall, 1631 0, "tx_stall"); 1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1633 "tx_wake", 1634 CTLFLAG_RD, &ss->tx.wake, 1635 0, "tx_wake"); 1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1637 "tx_defrag", 1638 CTLFLAG_RD, &ss->tx.defrag, 1639 0, "tx_defrag"); 1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1641 "tx_queue_active", 1642 CTLFLAG_RD, &ss->tx.queue_active, 1643 0, "tx_queue_active"); 1644 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1645 "tx_activate", 1646 CTLFLAG_RD, &ss->tx.activate, 1647 0, "tx_activate"); 1648 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1649 "tx_deactivate", 1650 CTLFLAG_RD, &ss->tx.deactivate, 1651 0, "tx_deactivate"); 1652 } 1653 } 1654 1655 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1656 backwards one at a time and handle ring wraps */ 1657 1658 static inline void 1659 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1660 mcp_kreq_ether_send_t *src, int cnt) 1661 { 1662 int idx, starting_slot; 1663 starting_slot = tx->req; 1664 while (cnt > 1) { 1665 cnt--; 1666 idx = (starting_slot + cnt) & tx->mask; 1667 mxge_pio_copy(&tx->lanai[idx], 1668 &src[cnt], sizeof(*src)); 1669 wmb(); 1670 } 1671 } 1672 1673 /* 1674 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1675 * at most 32 bytes at a time, so as to avoid involving the software 1676 * pio handler in the nic. We re-write the first segment's flags 1677 * to mark them valid only after writing the entire chain 1678 */ 1679 1680 static inline void 1681 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1682 int cnt) 1683 { 1684 int idx, i; 1685 uint32_t *src_ints; 1686 volatile uint32_t *dst_ints; 1687 mcp_kreq_ether_send_t *srcp; 1688 volatile mcp_kreq_ether_send_t *dstp, *dst; 1689 uint8_t last_flags; 1690 1691 idx = tx->req & tx->mask; 1692 1693 last_flags = src->flags; 1694 src->flags = 0; 1695 wmb(); 1696 dst = dstp = &tx->lanai[idx]; 1697 srcp = src; 1698 1699 if ((idx + cnt) < tx->mask) { 1700 for (i = 0; i < (cnt - 1); i += 2) { 1701 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1702 wmb(); /* force write every 32 bytes */ 1703 srcp += 2; 1704 dstp += 2; 1705 } 1706 } else { 1707 /* submit all but the first request, and ensure 1708 that it is submitted below */ 1709 mxge_submit_req_backwards(tx, src, cnt); 1710 i = 0; 1711 } 1712 if (i < cnt) { 1713 /* submit the first request */ 1714 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1715 wmb(); /* barrier before setting valid flag */ 1716 } 1717 1718 /* re-write the last 32-bits with the valid flags */ 1719 src->flags = last_flags; 1720 src_ints = (uint32_t *)src; 1721 src_ints+=3; 1722 dst_ints = (volatile uint32_t *)dst; 1723 dst_ints+=3; 1724 *dst_ints = *src_ints; 1725 tx->req += cnt; 1726 wmb(); 1727 } 1728 1729 static int 1730 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1731 struct mxge_pkt_info *pi) 1732 { 1733 struct ether_vlan_header *eh; 1734 uint16_t etype; 1735 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1736 #if IFCAP_TSO6 && defined(INET6) 1737 int nxt; 1738 #endif 1739 1740 eh = mtod(m, struct ether_vlan_header *); 1741 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1742 etype = ntohs(eh->evl_proto); 1743 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1744 } else { 1745 etype = ntohs(eh->evl_encap_proto); 1746 pi->ip_off = ETHER_HDR_LEN; 1747 } 1748 1749 switch (etype) { 1750 case ETHERTYPE_IP: 1751 /* 1752 * ensure ip header is in first mbuf, copy it to a 1753 * scratch buffer if not 1754 */ 1755 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1756 pi->ip6 = NULL; 1757 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1758 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1759 ss->scratch); 1760 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1761 } 1762 pi->ip_hlen = pi->ip->ip_hl << 2; 1763 if (!tso) 1764 return 0; 1765 1766 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1767 sizeof(struct tcphdr))) { 1768 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1769 sizeof(struct tcphdr), ss->scratch); 1770 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1771 } 1772 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1773 break; 1774 #if IFCAP_TSO6 && defined(INET6) 1775 case ETHERTYPE_IPV6: 1776 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1777 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1778 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1779 ss->scratch); 1780 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1781 } 1782 nxt = 0; 1783 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1784 pi->ip_hlen -= pi->ip_off; 1785 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1786 return EINVAL; 1787 1788 if (!tso) 1789 return 0; 1790 1791 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1792 return EINVAL; 1793 1794 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1795 sizeof(struct tcphdr))) { 1796 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1797 sizeof(struct tcphdr), ss->scratch); 1798 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1799 } 1800 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1801 break; 1802 #endif 1803 default: 1804 return EINVAL; 1805 } 1806 return 0; 1807 } 1808 1809 #if IFCAP_TSO4 1810 1811 static void 1812 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1813 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1814 { 1815 mxge_tx_ring_t *tx; 1816 mcp_kreq_ether_send_t *req; 1817 bus_dma_segment_t *seg; 1818 uint32_t low, high_swapped; 1819 int len, seglen, cum_len, cum_len_next; 1820 int next_is_first, chop, cnt, rdma_count, small; 1821 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1822 uint8_t flags, flags_next; 1823 static int once; 1824 1825 mss = m->m_pkthdr.tso_segsz; 1826 1827 /* negative cum_len signifies to the 1828 * send loop that we are still in the 1829 * header portion of the TSO packet. 1830 */ 1831 1832 cksum_offset = pi->ip_off + pi->ip_hlen; 1833 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1834 1835 /* TSO implies checksum offload on this hardware */ 1836 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1837 /* 1838 * If packet has full TCP csum, replace it with pseudo hdr 1839 * sum that the NIC expects, otherwise the NIC will emit 1840 * packets with bad TCP checksums. 1841 */ 1842 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1843 if (pi->ip6) { 1844 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1845 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1846 sum = in6_cksum_pseudo(pi->ip6, 1847 m->m_pkthdr.len - cksum_offset, 1848 IPPROTO_TCP, 0); 1849 #endif 1850 } else { 1851 #ifdef INET 1852 m->m_pkthdr.csum_flags |= CSUM_TCP; 1853 sum = in_pseudo(pi->ip->ip_src.s_addr, 1854 pi->ip->ip_dst.s_addr, 1855 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1856 cksum_offset))); 1857 #endif 1858 } 1859 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1860 cksum_offset, sizeof(sum), (caddr_t)&sum); 1861 } 1862 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1863 1864 /* for TSO, pseudo_hdr_offset holds mss. 1865 * The firmware figures out where to put 1866 * the checksum by parsing the header. */ 1867 pseudo_hdr_offset = htobe16(mss); 1868 1869 if (pi->ip6) { 1870 /* 1871 * for IPv6 TSO, the "checksum offset" is re-purposed 1872 * to store the TCP header len 1873 */ 1874 cksum_offset = (pi->tcp->th_off << 2); 1875 } 1876 1877 tx = &ss->tx; 1878 req = tx->req_list; 1879 seg = tx->seg_list; 1880 cnt = 0; 1881 rdma_count = 0; 1882 /* "rdma_count" is the number of RDMAs belonging to the 1883 * current packet BEFORE the current send request. For 1884 * non-TSO packets, this is equal to "count". 1885 * For TSO packets, rdma_count needs to be reset 1886 * to 0 after a segment cut. 1887 * 1888 * The rdma_count field of the send request is 1889 * the number of RDMAs of the packet starting at 1890 * that request. For TSO send requests with one ore more cuts 1891 * in the middle, this is the number of RDMAs starting 1892 * after the last cut in the request. All previous 1893 * segments before the last cut implicitly have 1 RDMA. 1894 * 1895 * Since the number of RDMAs is not known beforehand, 1896 * it must be filled-in retroactively - after each 1897 * segmentation cut or at the end of the entire packet. 1898 */ 1899 1900 while (busdma_seg_cnt) { 1901 /* Break the busdma segment up into pieces*/ 1902 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1903 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1904 len = seg->ds_len; 1905 1906 while (len) { 1907 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1908 seglen = len; 1909 cum_len_next = cum_len + seglen; 1910 (req-rdma_count)->rdma_count = rdma_count + 1; 1911 if (__predict_true(cum_len >= 0)) { 1912 /* payload */ 1913 chop = (cum_len_next > mss); 1914 cum_len_next = cum_len_next % mss; 1915 next_is_first = (cum_len_next == 0); 1916 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1917 flags_next |= next_is_first * 1918 MXGEFW_FLAGS_FIRST; 1919 rdma_count |= -(chop | next_is_first); 1920 rdma_count += chop & !next_is_first; 1921 } else if (cum_len_next >= 0) { 1922 /* header ends */ 1923 rdma_count = -1; 1924 cum_len_next = 0; 1925 seglen = -cum_len; 1926 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1927 flags_next = MXGEFW_FLAGS_TSO_PLD | 1928 MXGEFW_FLAGS_FIRST | 1929 (small * MXGEFW_FLAGS_SMALL); 1930 } 1931 1932 req->addr_high = high_swapped; 1933 req->addr_low = htobe32(low); 1934 req->pseudo_hdr_offset = pseudo_hdr_offset; 1935 req->pad = 0; 1936 req->rdma_count = 1; 1937 req->length = htobe16(seglen); 1938 req->cksum_offset = cksum_offset; 1939 req->flags = flags | ((cum_len & 1) * 1940 MXGEFW_FLAGS_ALIGN_ODD); 1941 low += seglen; 1942 len -= seglen; 1943 cum_len = cum_len_next; 1944 flags = flags_next; 1945 req++; 1946 cnt++; 1947 rdma_count++; 1948 if (cksum_offset != 0 && !pi->ip6) { 1949 if (__predict_false(cksum_offset > seglen)) 1950 cksum_offset -= seglen; 1951 else 1952 cksum_offset = 0; 1953 } 1954 if (__predict_false(cnt > tx->max_desc)) 1955 goto drop; 1956 } 1957 busdma_seg_cnt--; 1958 seg++; 1959 } 1960 (req-rdma_count)->rdma_count = rdma_count; 1961 1962 do { 1963 req--; 1964 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1965 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1966 1967 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1968 mxge_submit_req(tx, tx->req_list, cnt); 1969 #ifdef IFNET_BUF_RING 1970 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1971 /* tell the NIC to start polling this slice */ 1972 *tx->send_go = 1; 1973 tx->queue_active = 1; 1974 tx->activate++; 1975 wmb(); 1976 } 1977 #endif 1978 return; 1979 1980 drop: 1981 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1982 m_freem(m); 1983 ss->oerrors++; 1984 if (!once) { 1985 printf("tx->max_desc exceeded via TSO!\n"); 1986 printf("mss = %d, %ld, %d!\n", mss, 1987 (long)seg - (long)tx->seg_list, tx->max_desc); 1988 once = 1; 1989 } 1990 return; 1991 1992 } 1993 1994 #endif /* IFCAP_TSO4 */ 1995 1996 #ifdef MXGE_NEW_VLAN_API 1997 /* 1998 * We reproduce the software vlan tag insertion from 1999 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2000 * vlan tag insertion. We need to advertise this in order to have the 2001 * vlan interface respect our csum offload flags. 2002 */ 2003 static struct mbuf * 2004 mxge_vlan_tag_insert(struct mbuf *m) 2005 { 2006 struct ether_vlan_header *evl; 2007 2008 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2009 if (__predict_false(m == NULL)) 2010 return NULL; 2011 if (m->m_len < sizeof(*evl)) { 2012 m = m_pullup(m, sizeof(*evl)); 2013 if (__predict_false(m == NULL)) 2014 return NULL; 2015 } 2016 /* 2017 * Transform the Ethernet header into an Ethernet header 2018 * with 802.1Q encapsulation. 2019 */ 2020 evl = mtod(m, struct ether_vlan_header *); 2021 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2022 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2023 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2024 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2025 m->m_flags &= ~M_VLANTAG; 2026 return m; 2027 } 2028 #endif /* MXGE_NEW_VLAN_API */ 2029 2030 static void 2031 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2032 { 2033 struct mxge_pkt_info pi = {0,0,0,0}; 2034 mxge_softc_t *sc; 2035 mcp_kreq_ether_send_t *req; 2036 bus_dma_segment_t *seg; 2037 struct mbuf *m_tmp; 2038 mxge_tx_ring_t *tx; 2039 int cnt, cum_len, err, i, idx, odd_flag; 2040 uint16_t pseudo_hdr_offset; 2041 uint8_t flags, cksum_offset; 2042 2043 sc = ss->sc; 2044 tx = &ss->tx; 2045 2046 #ifdef MXGE_NEW_VLAN_API 2047 if (m->m_flags & M_VLANTAG) { 2048 m = mxge_vlan_tag_insert(m); 2049 if (__predict_false(m == NULL)) 2050 goto drop_without_m; 2051 } 2052 #endif 2053 if (m->m_pkthdr.csum_flags & 2054 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2055 if (mxge_parse_tx(ss, m, &pi)) 2056 goto drop; 2057 } 2058 2059 /* (try to) map the frame for DMA */ 2060 idx = tx->req & tx->mask; 2061 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2062 m, tx->seg_list, &cnt, 2063 BUS_DMA_NOWAIT); 2064 if (__predict_false(err == EFBIG)) { 2065 /* Too many segments in the chain. Try 2066 to defrag */ 2067 m_tmp = m_defrag(m, M_NOWAIT); 2068 if (m_tmp == NULL) { 2069 goto drop; 2070 } 2071 ss->tx.defrag++; 2072 m = m_tmp; 2073 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2074 tx->info[idx].map, 2075 m, tx->seg_list, &cnt, 2076 BUS_DMA_NOWAIT); 2077 } 2078 if (__predict_false(err != 0)) { 2079 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2080 " packet len = %d\n", err, m->m_pkthdr.len); 2081 goto drop; 2082 } 2083 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2084 BUS_DMASYNC_PREWRITE); 2085 tx->info[idx].m = m; 2086 2087 #if IFCAP_TSO4 2088 /* TSO is different enough, we handle it in another routine */ 2089 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2090 mxge_encap_tso(ss, m, cnt, &pi); 2091 return; 2092 } 2093 #endif 2094 2095 req = tx->req_list; 2096 cksum_offset = 0; 2097 pseudo_hdr_offset = 0; 2098 flags = MXGEFW_FLAGS_NO_TSO; 2099 2100 /* checksum offloading? */ 2101 if (m->m_pkthdr.csum_flags & 2102 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2103 /* ensure ip header is in first mbuf, copy 2104 it to a scratch buffer if not */ 2105 cksum_offset = pi.ip_off + pi.ip_hlen; 2106 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2107 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2108 req->cksum_offset = cksum_offset; 2109 flags |= MXGEFW_FLAGS_CKSUM; 2110 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2111 } else { 2112 odd_flag = 0; 2113 } 2114 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2115 flags |= MXGEFW_FLAGS_SMALL; 2116 2117 /* convert segments into a request list */ 2118 cum_len = 0; 2119 seg = tx->seg_list; 2120 req->flags = MXGEFW_FLAGS_FIRST; 2121 for (i = 0; i < cnt; i++) { 2122 req->addr_low = 2123 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2124 req->addr_high = 2125 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2126 req->length = htobe16(seg->ds_len); 2127 req->cksum_offset = cksum_offset; 2128 if (cksum_offset > seg->ds_len) 2129 cksum_offset -= seg->ds_len; 2130 else 2131 cksum_offset = 0; 2132 req->pseudo_hdr_offset = pseudo_hdr_offset; 2133 req->pad = 0; /* complete solid 16-byte block */ 2134 req->rdma_count = 1; 2135 req->flags |= flags | ((cum_len & 1) * odd_flag); 2136 cum_len += seg->ds_len; 2137 seg++; 2138 req++; 2139 req->flags = 0; 2140 } 2141 req--; 2142 /* pad runts to 60 bytes */ 2143 if (cum_len < 60) { 2144 req++; 2145 req->addr_low = 2146 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2147 req->addr_high = 2148 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2149 req->length = htobe16(60 - cum_len); 2150 req->cksum_offset = 0; 2151 req->pseudo_hdr_offset = pseudo_hdr_offset; 2152 req->pad = 0; /* complete solid 16-byte block */ 2153 req->rdma_count = 1; 2154 req->flags |= flags | ((cum_len & 1) * odd_flag); 2155 cnt++; 2156 } 2157 2158 tx->req_list[0].rdma_count = cnt; 2159 #if 0 2160 /* print what the firmware will see */ 2161 for (i = 0; i < cnt; i++) { 2162 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2163 "cso:%d, flags:0x%x, rdma:%d\n", 2164 i, (int)ntohl(tx->req_list[i].addr_high), 2165 (int)ntohl(tx->req_list[i].addr_low), 2166 (int)ntohs(tx->req_list[i].length), 2167 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2168 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2169 tx->req_list[i].rdma_count); 2170 } 2171 printf("--------------\n"); 2172 #endif 2173 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2174 mxge_submit_req(tx, tx->req_list, cnt); 2175 #ifdef IFNET_BUF_RING 2176 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2177 /* tell the NIC to start polling this slice */ 2178 *tx->send_go = 1; 2179 tx->queue_active = 1; 2180 tx->activate++; 2181 wmb(); 2182 } 2183 #endif 2184 return; 2185 2186 drop: 2187 m_freem(m); 2188 drop_without_m: 2189 ss->oerrors++; 2190 return; 2191 } 2192 2193 #ifdef IFNET_BUF_RING 2194 static void 2195 mxge_qflush(struct ifnet *ifp) 2196 { 2197 mxge_softc_t *sc = ifp->if_softc; 2198 mxge_tx_ring_t *tx; 2199 struct mbuf *m; 2200 int slice; 2201 2202 for (slice = 0; slice < sc->num_slices; slice++) { 2203 tx = &sc->ss[slice].tx; 2204 mtx_lock(&tx->mtx); 2205 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2206 m_freem(m); 2207 mtx_unlock(&tx->mtx); 2208 } 2209 if_qflush(ifp); 2210 } 2211 2212 static inline void 2213 mxge_start_locked(struct mxge_slice_state *ss) 2214 { 2215 mxge_softc_t *sc; 2216 struct mbuf *m; 2217 struct ifnet *ifp; 2218 mxge_tx_ring_t *tx; 2219 2220 sc = ss->sc; 2221 ifp = sc->ifp; 2222 tx = &ss->tx; 2223 2224 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2225 m = drbr_dequeue(ifp, tx->br); 2226 if (m == NULL) { 2227 return; 2228 } 2229 /* let BPF see it */ 2230 BPF_MTAP(ifp, m); 2231 2232 /* give it to the nic */ 2233 mxge_encap(ss, m); 2234 } 2235 /* ran out of transmit slots */ 2236 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2237 && (!drbr_empty(ifp, tx->br))) { 2238 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2239 tx->stall++; 2240 } 2241 } 2242 2243 static int 2244 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2245 { 2246 mxge_softc_t *sc; 2247 struct ifnet *ifp; 2248 mxge_tx_ring_t *tx; 2249 int err; 2250 2251 sc = ss->sc; 2252 ifp = sc->ifp; 2253 tx = &ss->tx; 2254 2255 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2256 IFF_DRV_RUNNING) { 2257 err = drbr_enqueue(ifp, tx->br, m); 2258 return (err); 2259 } 2260 2261 if (!drbr_needs_enqueue(ifp, tx->br) && 2262 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2263 /* let BPF see it */ 2264 BPF_MTAP(ifp, m); 2265 /* give it to the nic */ 2266 mxge_encap(ss, m); 2267 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2268 return (err); 2269 } 2270 if (!drbr_empty(ifp, tx->br)) 2271 mxge_start_locked(ss); 2272 return (0); 2273 } 2274 2275 static int 2276 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2277 { 2278 mxge_softc_t *sc = ifp->if_softc; 2279 struct mxge_slice_state *ss; 2280 mxge_tx_ring_t *tx; 2281 int err = 0; 2282 int slice; 2283 2284 slice = m->m_pkthdr.flowid; 2285 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2286 2287 ss = &sc->ss[slice]; 2288 tx = &ss->tx; 2289 2290 if (mtx_trylock(&tx->mtx)) { 2291 err = mxge_transmit_locked(ss, m); 2292 mtx_unlock(&tx->mtx); 2293 } else { 2294 err = drbr_enqueue(ifp, tx->br, m); 2295 } 2296 2297 return (err); 2298 } 2299 2300 #else 2301 2302 static inline void 2303 mxge_start_locked(struct mxge_slice_state *ss) 2304 { 2305 mxge_softc_t *sc; 2306 struct mbuf *m; 2307 struct ifnet *ifp; 2308 mxge_tx_ring_t *tx; 2309 2310 sc = ss->sc; 2311 ifp = sc->ifp; 2312 tx = &ss->tx; 2313 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2314 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2315 if (m == NULL) { 2316 return; 2317 } 2318 /* let BPF see it */ 2319 BPF_MTAP(ifp, m); 2320 2321 /* give it to the nic */ 2322 mxge_encap(ss, m); 2323 } 2324 /* ran out of transmit slots */ 2325 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2326 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2327 tx->stall++; 2328 } 2329 } 2330 #endif 2331 static void 2332 mxge_start(struct ifnet *ifp) 2333 { 2334 mxge_softc_t *sc = ifp->if_softc; 2335 struct mxge_slice_state *ss; 2336 2337 /* only use the first slice for now */ 2338 ss = &sc->ss[0]; 2339 mtx_lock(&ss->tx.mtx); 2340 mxge_start_locked(ss); 2341 mtx_unlock(&ss->tx.mtx); 2342 } 2343 2344 /* 2345 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2346 * at most 32 bytes at a time, so as to avoid involving the software 2347 * pio handler in the nic. We re-write the first segment's low 2348 * DMA address to mark it valid only after we write the entire chunk 2349 * in a burst 2350 */ 2351 static inline void 2352 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2353 mcp_kreq_ether_recv_t *src) 2354 { 2355 uint32_t low; 2356 2357 low = src->addr_low; 2358 src->addr_low = 0xffffffff; 2359 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2360 wmb(); 2361 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2362 wmb(); 2363 src->addr_low = low; 2364 dst->addr_low = low; 2365 wmb(); 2366 } 2367 2368 static int 2369 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2370 { 2371 bus_dma_segment_t seg; 2372 struct mbuf *m; 2373 mxge_rx_ring_t *rx = &ss->rx_small; 2374 int cnt, err; 2375 2376 m = m_gethdr(M_NOWAIT, MT_DATA); 2377 if (m == NULL) { 2378 rx->alloc_fail++; 2379 err = ENOBUFS; 2380 goto done; 2381 } 2382 m->m_len = MHLEN; 2383 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2384 &seg, &cnt, BUS_DMA_NOWAIT); 2385 if (err != 0) { 2386 m_free(m); 2387 goto done; 2388 } 2389 rx->info[idx].m = m; 2390 rx->shadow[idx].addr_low = 2391 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2392 rx->shadow[idx].addr_high = 2393 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2394 2395 done: 2396 if ((idx & 7) == 7) 2397 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2398 return err; 2399 } 2400 2401 static int 2402 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2403 { 2404 bus_dma_segment_t seg[3]; 2405 struct mbuf *m; 2406 mxge_rx_ring_t *rx = &ss->rx_big; 2407 int cnt, err, i; 2408 2409 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2410 if (m == NULL) { 2411 rx->alloc_fail++; 2412 err = ENOBUFS; 2413 goto done; 2414 } 2415 m->m_len = rx->mlen; 2416 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2417 seg, &cnt, BUS_DMA_NOWAIT); 2418 if (err != 0) { 2419 m_free(m); 2420 goto done; 2421 } 2422 rx->info[idx].m = m; 2423 rx->shadow[idx].addr_low = 2424 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2425 rx->shadow[idx].addr_high = 2426 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2427 2428 #if MXGE_VIRT_JUMBOS 2429 for (i = 1; i < cnt; i++) { 2430 rx->shadow[idx + i].addr_low = 2431 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2432 rx->shadow[idx + i].addr_high = 2433 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2434 } 2435 #endif 2436 2437 done: 2438 for (i = 0; i < rx->nbufs; i++) { 2439 if ((idx & 7) == 7) { 2440 mxge_submit_8rx(&rx->lanai[idx - 7], 2441 &rx->shadow[idx - 7]); 2442 } 2443 idx++; 2444 } 2445 return err; 2446 } 2447 2448 #ifdef INET6 2449 2450 static uint16_t 2451 mxge_csum_generic(uint16_t *raw, int len) 2452 { 2453 uint32_t csum; 2454 2455 csum = 0; 2456 while (len > 0) { 2457 csum += *raw; 2458 raw++; 2459 len -= 2; 2460 } 2461 csum = (csum >> 16) + (csum & 0xffff); 2462 csum = (csum >> 16) + (csum & 0xffff); 2463 return (uint16_t)csum; 2464 } 2465 2466 static inline uint16_t 2467 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2468 { 2469 uint32_t partial; 2470 int nxt, cksum_offset; 2471 struct ip6_hdr *ip6 = p; 2472 uint16_t c; 2473 2474 nxt = ip6->ip6_nxt; 2475 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2476 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2477 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2478 IPPROTO_IPV6, &nxt); 2479 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2480 return (1); 2481 } 2482 2483 /* 2484 * IPv6 headers do not contain a checksum, and hence 2485 * do not checksum to zero, so they don't "fall out" 2486 * of the partial checksum calculation like IPv4 2487 * headers do. We need to fix the partial checksum by 2488 * subtracting the checksum of the IPv6 header. 2489 */ 2490 2491 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2492 ETHER_HDR_LEN); 2493 csum += ~partial; 2494 csum += (csum < ~partial); 2495 csum = (csum >> 16) + (csum & 0xFFFF); 2496 csum = (csum >> 16) + (csum & 0xFFFF); 2497 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2498 csum); 2499 c ^= 0xffff; 2500 return (c); 2501 } 2502 #endif /* INET6 */ 2503 /* 2504 * Myri10GE hardware checksums are not valid if the sender 2505 * padded the frame with non-zero padding. This is because 2506 * the firmware just does a simple 16-bit 1s complement 2507 * checksum across the entire frame, excluding the first 14 2508 * bytes. It is best to simply to check the checksum and 2509 * tell the stack about it only if the checksum is good 2510 */ 2511 2512 static inline uint16_t 2513 mxge_rx_csum(struct mbuf *m, int csum) 2514 { 2515 struct ether_header *eh; 2516 #ifdef INET 2517 struct ip *ip; 2518 #endif 2519 #if defined(INET) || defined(INET6) 2520 int cap = m->m_pkthdr.rcvif->if_capenable; 2521 #endif 2522 uint16_t c, etype; 2523 2524 eh = mtod(m, struct ether_header *); 2525 etype = ntohs(eh->ether_type); 2526 switch (etype) { 2527 #ifdef INET 2528 case ETHERTYPE_IP: 2529 if ((cap & IFCAP_RXCSUM) == 0) 2530 return (1); 2531 ip = (struct ip *)(eh + 1); 2532 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2533 return (1); 2534 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2535 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2536 (ip->ip_hl << 2) + ip->ip_p)); 2537 c ^= 0xffff; 2538 break; 2539 #endif 2540 #ifdef INET6 2541 case ETHERTYPE_IPV6: 2542 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2543 return (1); 2544 c = mxge_rx_csum6((eh + 1), m, csum); 2545 break; 2546 #endif 2547 default: 2548 c = 1; 2549 } 2550 return (c); 2551 } 2552 2553 static void 2554 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2555 { 2556 struct ether_vlan_header *evl; 2557 uint32_t partial; 2558 2559 evl = mtod(m, struct ether_vlan_header *); 2560 2561 /* 2562 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2563 * after what the firmware thought was the end of the ethernet 2564 * header. 2565 */ 2566 2567 /* put checksum into host byte order */ 2568 *csum = ntohs(*csum); 2569 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2570 (*csum) += ~partial; 2571 (*csum) += ((*csum) < ~partial); 2572 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2573 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2574 2575 /* restore checksum to network byte order; 2576 later consumers expect this */ 2577 *csum = htons(*csum); 2578 2579 /* save the tag */ 2580 #ifdef MXGE_NEW_VLAN_API 2581 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2582 #else 2583 { 2584 struct m_tag *mtag; 2585 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2586 M_NOWAIT); 2587 if (mtag == NULL) 2588 return; 2589 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2590 m_tag_prepend(m, mtag); 2591 } 2592 2593 #endif 2594 m->m_flags |= M_VLANTAG; 2595 2596 /* 2597 * Remove the 802.1q header by copying the Ethernet 2598 * addresses over it and adjusting the beginning of 2599 * the data in the mbuf. The encapsulated Ethernet 2600 * type field is already in place. 2601 */ 2602 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2603 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2604 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2605 } 2606 2607 static inline void 2608 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2609 uint32_t csum, int lro) 2610 { 2611 mxge_softc_t *sc; 2612 struct ifnet *ifp; 2613 struct mbuf *m; 2614 struct ether_header *eh; 2615 mxge_rx_ring_t *rx; 2616 bus_dmamap_t old_map; 2617 int idx; 2618 2619 sc = ss->sc; 2620 ifp = sc->ifp; 2621 rx = &ss->rx_big; 2622 idx = rx->cnt & rx->mask; 2623 rx->cnt += rx->nbufs; 2624 /* save a pointer to the received mbuf */ 2625 m = rx->info[idx].m; 2626 /* try to replace the received mbuf */ 2627 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2628 /* drop the frame -- the old mbuf is re-cycled */ 2629 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2630 return; 2631 } 2632 2633 /* unmap the received buffer */ 2634 old_map = rx->info[idx].map; 2635 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2636 bus_dmamap_unload(rx->dmat, old_map); 2637 2638 /* swap the bus_dmamap_t's */ 2639 rx->info[idx].map = rx->extra_map; 2640 rx->extra_map = old_map; 2641 2642 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2643 * aligned */ 2644 m->m_data += MXGEFW_PAD; 2645 2646 m->m_pkthdr.rcvif = ifp; 2647 m->m_len = m->m_pkthdr.len = len; 2648 ss->ipackets++; 2649 eh = mtod(m, struct ether_header *); 2650 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2651 mxge_vlan_tag_remove(m, &csum); 2652 } 2653 /* flowid only valid if RSS hashing is enabled */ 2654 if (sc->num_slices > 1) { 2655 m->m_pkthdr.flowid = (ss - sc->ss); 2656 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2657 } 2658 /* if the checksum is valid, mark it in the mbuf header */ 2659 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2660 (0 == mxge_rx_csum(m, csum))) { 2661 /* Tell the stack that the checksum is good */ 2662 m->m_pkthdr.csum_data = 0xffff; 2663 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2664 CSUM_DATA_VALID; 2665 2666 #if defined(INET) || defined (INET6) 2667 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2668 return; 2669 #endif 2670 } 2671 /* pass the frame up the stack */ 2672 (*ifp->if_input)(ifp, m); 2673 } 2674 2675 static inline void 2676 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2677 uint32_t csum, int lro) 2678 { 2679 mxge_softc_t *sc; 2680 struct ifnet *ifp; 2681 struct ether_header *eh; 2682 struct mbuf *m; 2683 mxge_rx_ring_t *rx; 2684 bus_dmamap_t old_map; 2685 int idx; 2686 2687 sc = ss->sc; 2688 ifp = sc->ifp; 2689 rx = &ss->rx_small; 2690 idx = rx->cnt & rx->mask; 2691 rx->cnt++; 2692 /* save a pointer to the received mbuf */ 2693 m = rx->info[idx].m; 2694 /* try to replace the received mbuf */ 2695 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2696 /* drop the frame -- the old mbuf is re-cycled */ 2697 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2698 return; 2699 } 2700 2701 /* unmap the received buffer */ 2702 old_map = rx->info[idx].map; 2703 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2704 bus_dmamap_unload(rx->dmat, old_map); 2705 2706 /* swap the bus_dmamap_t's */ 2707 rx->info[idx].map = rx->extra_map; 2708 rx->extra_map = old_map; 2709 2710 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2711 * aligned */ 2712 m->m_data += MXGEFW_PAD; 2713 2714 m->m_pkthdr.rcvif = ifp; 2715 m->m_len = m->m_pkthdr.len = len; 2716 ss->ipackets++; 2717 eh = mtod(m, struct ether_header *); 2718 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2719 mxge_vlan_tag_remove(m, &csum); 2720 } 2721 /* flowid only valid if RSS hashing is enabled */ 2722 if (sc->num_slices > 1) { 2723 m->m_pkthdr.flowid = (ss - sc->ss); 2724 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2725 } 2726 /* if the checksum is valid, mark it in the mbuf header */ 2727 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2728 (0 == mxge_rx_csum(m, csum))) { 2729 /* Tell the stack that the checksum is good */ 2730 m->m_pkthdr.csum_data = 0xffff; 2731 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2732 CSUM_DATA_VALID; 2733 2734 #if defined(INET) || defined (INET6) 2735 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2736 return; 2737 #endif 2738 } 2739 /* pass the frame up the stack */ 2740 (*ifp->if_input)(ifp, m); 2741 } 2742 2743 static inline void 2744 mxge_clean_rx_done(struct mxge_slice_state *ss) 2745 { 2746 mxge_rx_done_t *rx_done = &ss->rx_done; 2747 int limit = 0; 2748 uint16_t length; 2749 uint16_t checksum; 2750 int lro; 2751 2752 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2753 while (rx_done->entry[rx_done->idx].length != 0) { 2754 length = ntohs(rx_done->entry[rx_done->idx].length); 2755 rx_done->entry[rx_done->idx].length = 0; 2756 checksum = rx_done->entry[rx_done->idx].checksum; 2757 if (length <= (MHLEN - MXGEFW_PAD)) 2758 mxge_rx_done_small(ss, length, checksum, lro); 2759 else 2760 mxge_rx_done_big(ss, length, checksum, lro); 2761 rx_done->cnt++; 2762 rx_done->idx = rx_done->cnt & rx_done->mask; 2763 2764 /* limit potential for livelock */ 2765 if (__predict_false(++limit > rx_done->mask / 2)) 2766 break; 2767 } 2768 #if defined(INET) || defined (INET6) 2769 tcp_lro_flush_all(&ss->lc); 2770 #endif 2771 } 2772 2773 static inline void 2774 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2775 { 2776 struct ifnet *ifp __unused; 2777 mxge_tx_ring_t *tx; 2778 struct mbuf *m; 2779 bus_dmamap_t map; 2780 int idx; 2781 int *flags; 2782 2783 tx = &ss->tx; 2784 ifp = ss->sc->ifp; 2785 while (tx->pkt_done != mcp_idx) { 2786 idx = tx->done & tx->mask; 2787 tx->done++; 2788 m = tx->info[idx].m; 2789 /* mbuf and DMA map only attached to the first 2790 segment per-mbuf */ 2791 if (m != NULL) { 2792 ss->obytes += m->m_pkthdr.len; 2793 if (m->m_flags & M_MCAST) 2794 ss->omcasts++; 2795 ss->opackets++; 2796 tx->info[idx].m = NULL; 2797 map = tx->info[idx].map; 2798 bus_dmamap_unload(tx->dmat, map); 2799 m_freem(m); 2800 } 2801 if (tx->info[idx].flag) { 2802 tx->info[idx].flag = 0; 2803 tx->pkt_done++; 2804 } 2805 } 2806 2807 /* If we have space, clear IFF_OACTIVE to tell the stack that 2808 its OK to send packets */ 2809 #ifdef IFNET_BUF_RING 2810 flags = &ss->if_drv_flags; 2811 #else 2812 flags = &ifp->if_drv_flags; 2813 #endif 2814 mtx_lock(&ss->tx.mtx); 2815 if ((*flags) & IFF_DRV_OACTIVE && 2816 tx->req - tx->done < (tx->mask + 1)/4) { 2817 *(flags) &= ~IFF_DRV_OACTIVE; 2818 ss->tx.wake++; 2819 mxge_start_locked(ss); 2820 } 2821 #ifdef IFNET_BUF_RING 2822 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2823 /* let the NIC stop polling this queue, since there 2824 * are no more transmits pending */ 2825 if (tx->req == tx->done) { 2826 *tx->send_stop = 1; 2827 tx->queue_active = 0; 2828 tx->deactivate++; 2829 wmb(); 2830 } 2831 } 2832 #endif 2833 mtx_unlock(&ss->tx.mtx); 2834 2835 } 2836 2837 static struct mxge_media_type mxge_xfp_media_types[] = 2838 { 2839 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2840 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2841 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2842 {0, (1 << 5), "10GBASE-ER"}, 2843 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2844 {0, (1 << 3), "10GBASE-SW"}, 2845 {0, (1 << 2), "10GBASE-LW"}, 2846 {0, (1 << 1), "10GBASE-EW"}, 2847 {0, (1 << 0), "Reserved"} 2848 }; 2849 static struct mxge_media_type mxge_sfp_media_types[] = 2850 { 2851 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2852 {0, (1 << 7), "Reserved"}, 2853 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2854 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2855 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2856 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2857 }; 2858 2859 static void 2860 mxge_media_set(mxge_softc_t *sc, int media_type) 2861 { 2862 2863 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2864 0, NULL); 2865 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2866 sc->current_media = media_type; 2867 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2868 } 2869 2870 static void 2871 mxge_media_init(mxge_softc_t *sc) 2872 { 2873 char *ptr; 2874 int i; 2875 2876 ifmedia_removeall(&sc->media); 2877 mxge_media_set(sc, IFM_AUTO); 2878 2879 /* 2880 * parse the product code to deterimine the interface type 2881 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2882 * after the 3rd dash in the driver's cached copy of the 2883 * EEPROM's product code string. 2884 */ 2885 ptr = sc->product_code_string; 2886 if (ptr == NULL) { 2887 device_printf(sc->dev, "Missing product code\n"); 2888 return; 2889 } 2890 2891 for (i = 0; i < 3; i++, ptr++) { 2892 ptr = strchr(ptr, '-'); 2893 if (ptr == NULL) { 2894 device_printf(sc->dev, 2895 "only %d dashes in PC?!?\n", i); 2896 return; 2897 } 2898 } 2899 if (*ptr == 'C' || *(ptr +1) == 'C') { 2900 /* -C is CX4 */ 2901 sc->connector = MXGE_CX4; 2902 mxge_media_set(sc, IFM_10G_CX4); 2903 } else if (*ptr == 'Q') { 2904 /* -Q is Quad Ribbon Fiber */ 2905 sc->connector = MXGE_QRF; 2906 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2907 /* FreeBSD has no media type for Quad ribbon fiber */ 2908 } else if (*ptr == 'R') { 2909 /* -R is XFP */ 2910 sc->connector = MXGE_XFP; 2911 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2912 /* -S or -2S is SFP+ */ 2913 sc->connector = MXGE_SFP; 2914 } else { 2915 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2916 } 2917 } 2918 2919 /* 2920 * Determine the media type for a NIC. Some XFPs will identify 2921 * themselves only when their link is up, so this is initiated via a 2922 * link up interrupt. However, this can potentially take up to 2923 * several milliseconds, so it is run via the watchdog routine, rather 2924 * than in the interrupt handler itself. 2925 */ 2926 static void 2927 mxge_media_probe(mxge_softc_t *sc) 2928 { 2929 mxge_cmd_t cmd; 2930 char *cage_type; 2931 2932 struct mxge_media_type *mxge_media_types = NULL; 2933 int i, err, ms, mxge_media_type_entries; 2934 uint32_t byte; 2935 2936 sc->need_media_probe = 0; 2937 2938 if (sc->connector == MXGE_XFP) { 2939 /* -R is XFP */ 2940 mxge_media_types = mxge_xfp_media_types; 2941 mxge_media_type_entries = 2942 nitems(mxge_xfp_media_types); 2943 byte = MXGE_XFP_COMPLIANCE_BYTE; 2944 cage_type = "XFP"; 2945 } else if (sc->connector == MXGE_SFP) { 2946 /* -S or -2S is SFP+ */ 2947 mxge_media_types = mxge_sfp_media_types; 2948 mxge_media_type_entries = 2949 nitems(mxge_sfp_media_types); 2950 cage_type = "SFP+"; 2951 byte = 3; 2952 } else { 2953 /* nothing to do; media type cannot change */ 2954 return; 2955 } 2956 2957 /* 2958 * At this point we know the NIC has an XFP cage, so now we 2959 * try to determine what is in the cage by using the 2960 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2961 * register. We read just one byte, which may take over 2962 * a millisecond 2963 */ 2964 2965 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2966 cmd.data1 = byte; 2967 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2968 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2969 device_printf(sc->dev, "failed to read XFP\n"); 2970 } 2971 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2972 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2973 } 2974 if (err != MXGEFW_CMD_OK) { 2975 return; 2976 } 2977 2978 /* now we wait for the data to be cached */ 2979 cmd.data0 = byte; 2980 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2981 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2982 DELAY(1000); 2983 cmd.data0 = byte; 2984 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2985 } 2986 if (err != MXGEFW_CMD_OK) { 2987 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2988 cage_type, err, ms); 2989 return; 2990 } 2991 2992 if (cmd.data0 == mxge_media_types[0].bitmask) { 2993 if (mxge_verbose) 2994 device_printf(sc->dev, "%s:%s\n", cage_type, 2995 mxge_media_types[0].name); 2996 if (sc->current_media != mxge_media_types[0].flag) { 2997 mxge_media_init(sc); 2998 mxge_media_set(sc, mxge_media_types[0].flag); 2999 } 3000 return; 3001 } 3002 for (i = 1; i < mxge_media_type_entries; i++) { 3003 if (cmd.data0 & mxge_media_types[i].bitmask) { 3004 if (mxge_verbose) 3005 device_printf(sc->dev, "%s:%s\n", 3006 cage_type, 3007 mxge_media_types[i].name); 3008 3009 if (sc->current_media != mxge_media_types[i].flag) { 3010 mxge_media_init(sc); 3011 mxge_media_set(sc, mxge_media_types[i].flag); 3012 } 3013 return; 3014 } 3015 } 3016 if (mxge_verbose) 3017 device_printf(sc->dev, "%s media 0x%x unknown\n", 3018 cage_type, cmd.data0); 3019 3020 return; 3021 } 3022 3023 static void 3024 mxge_intr(void *arg) 3025 { 3026 struct mxge_slice_state *ss = arg; 3027 mxge_softc_t *sc = ss->sc; 3028 mcp_irq_data_t *stats = ss->fw_stats; 3029 mxge_tx_ring_t *tx = &ss->tx; 3030 mxge_rx_done_t *rx_done = &ss->rx_done; 3031 uint32_t send_done_count; 3032 uint8_t valid; 3033 3034 #ifndef IFNET_BUF_RING 3035 /* an interrupt on a non-zero slice is implicitly valid 3036 since MSI-X irqs are not shared */ 3037 if (ss != sc->ss) { 3038 mxge_clean_rx_done(ss); 3039 *ss->irq_claim = be32toh(3); 3040 return; 3041 } 3042 #endif 3043 3044 /* make sure the DMA has finished */ 3045 if (!stats->valid) { 3046 return; 3047 } 3048 valid = stats->valid; 3049 3050 if (sc->legacy_irq) { 3051 /* lower legacy IRQ */ 3052 *sc->irq_deassert = 0; 3053 if (!mxge_deassert_wait) 3054 /* don't wait for conf. that irq is low */ 3055 stats->valid = 0; 3056 } else { 3057 stats->valid = 0; 3058 } 3059 3060 /* loop while waiting for legacy irq deassertion */ 3061 do { 3062 /* check for transmit completes and receives */ 3063 send_done_count = be32toh(stats->send_done_count); 3064 while ((send_done_count != tx->pkt_done) || 3065 (rx_done->entry[rx_done->idx].length != 0)) { 3066 if (send_done_count != tx->pkt_done) 3067 mxge_tx_done(ss, (int)send_done_count); 3068 mxge_clean_rx_done(ss); 3069 send_done_count = be32toh(stats->send_done_count); 3070 } 3071 if (sc->legacy_irq && mxge_deassert_wait) 3072 wmb(); 3073 } while (*((volatile uint8_t *) &stats->valid)); 3074 3075 /* fw link & error stats meaningful only on the first slice */ 3076 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3077 if (sc->link_state != stats->link_up) { 3078 sc->link_state = stats->link_up; 3079 if (sc->link_state) { 3080 if_link_state_change(sc->ifp, LINK_STATE_UP); 3081 if (mxge_verbose) 3082 device_printf(sc->dev, "link up\n"); 3083 } else { 3084 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3085 if (mxge_verbose) 3086 device_printf(sc->dev, "link down\n"); 3087 } 3088 sc->need_media_probe = 1; 3089 } 3090 if (sc->rdma_tags_available != 3091 be32toh(stats->rdma_tags_available)) { 3092 sc->rdma_tags_available = 3093 be32toh(stats->rdma_tags_available); 3094 device_printf(sc->dev, "RDMA timed out! %d tags " 3095 "left\n", sc->rdma_tags_available); 3096 } 3097 3098 if (stats->link_down) { 3099 sc->down_cnt += stats->link_down; 3100 sc->link_state = 0; 3101 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3102 } 3103 } 3104 3105 /* check to see if we have rx token to pass back */ 3106 if (valid & 0x1) 3107 *ss->irq_claim = be32toh(3); 3108 *(ss->irq_claim + 1) = be32toh(3); 3109 } 3110 3111 static void 3112 mxge_init(void *arg) 3113 { 3114 mxge_softc_t *sc = arg; 3115 struct ifnet *ifp = sc->ifp; 3116 3117 mtx_lock(&sc->driver_mtx); 3118 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3119 (void) mxge_open(sc); 3120 mtx_unlock(&sc->driver_mtx); 3121 } 3122 3123 static void 3124 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3125 { 3126 int i; 3127 3128 #if defined(INET) || defined(INET6) 3129 tcp_lro_free(&ss->lc); 3130 #endif 3131 for (i = 0; i <= ss->rx_big.mask; i++) { 3132 if (ss->rx_big.info[i].m == NULL) 3133 continue; 3134 bus_dmamap_unload(ss->rx_big.dmat, 3135 ss->rx_big.info[i].map); 3136 m_freem(ss->rx_big.info[i].m); 3137 ss->rx_big.info[i].m = NULL; 3138 } 3139 3140 for (i = 0; i <= ss->rx_small.mask; i++) { 3141 if (ss->rx_small.info[i].m == NULL) 3142 continue; 3143 bus_dmamap_unload(ss->rx_small.dmat, 3144 ss->rx_small.info[i].map); 3145 m_freem(ss->rx_small.info[i].m); 3146 ss->rx_small.info[i].m = NULL; 3147 } 3148 3149 /* transmit ring used only on the first slice */ 3150 if (ss->tx.info == NULL) 3151 return; 3152 3153 for (i = 0; i <= ss->tx.mask; i++) { 3154 ss->tx.info[i].flag = 0; 3155 if (ss->tx.info[i].m == NULL) 3156 continue; 3157 bus_dmamap_unload(ss->tx.dmat, 3158 ss->tx.info[i].map); 3159 m_freem(ss->tx.info[i].m); 3160 ss->tx.info[i].m = NULL; 3161 } 3162 } 3163 3164 static void 3165 mxge_free_mbufs(mxge_softc_t *sc) 3166 { 3167 int slice; 3168 3169 for (slice = 0; slice < sc->num_slices; slice++) 3170 mxge_free_slice_mbufs(&sc->ss[slice]); 3171 } 3172 3173 static void 3174 mxge_free_slice_rings(struct mxge_slice_state *ss) 3175 { 3176 int i; 3177 3178 if (ss->rx_done.entry != NULL) 3179 mxge_dma_free(&ss->rx_done.dma); 3180 ss->rx_done.entry = NULL; 3181 3182 if (ss->tx.req_bytes != NULL) 3183 free(ss->tx.req_bytes, M_DEVBUF); 3184 ss->tx.req_bytes = NULL; 3185 3186 if (ss->tx.seg_list != NULL) 3187 free(ss->tx.seg_list, M_DEVBUF); 3188 ss->tx.seg_list = NULL; 3189 3190 if (ss->rx_small.shadow != NULL) 3191 free(ss->rx_small.shadow, M_DEVBUF); 3192 ss->rx_small.shadow = NULL; 3193 3194 if (ss->rx_big.shadow != NULL) 3195 free(ss->rx_big.shadow, M_DEVBUF); 3196 ss->rx_big.shadow = NULL; 3197 3198 if (ss->tx.info != NULL) { 3199 if (ss->tx.dmat != NULL) { 3200 for (i = 0; i <= ss->tx.mask; i++) { 3201 bus_dmamap_destroy(ss->tx.dmat, 3202 ss->tx.info[i].map); 3203 } 3204 bus_dma_tag_destroy(ss->tx.dmat); 3205 } 3206 free(ss->tx.info, M_DEVBUF); 3207 } 3208 ss->tx.info = NULL; 3209 3210 if (ss->rx_small.info != NULL) { 3211 if (ss->rx_small.dmat != NULL) { 3212 for (i = 0; i <= ss->rx_small.mask; i++) { 3213 bus_dmamap_destroy(ss->rx_small.dmat, 3214 ss->rx_small.info[i].map); 3215 } 3216 bus_dmamap_destroy(ss->rx_small.dmat, 3217 ss->rx_small.extra_map); 3218 bus_dma_tag_destroy(ss->rx_small.dmat); 3219 } 3220 free(ss->rx_small.info, M_DEVBUF); 3221 } 3222 ss->rx_small.info = NULL; 3223 3224 if (ss->rx_big.info != NULL) { 3225 if (ss->rx_big.dmat != NULL) { 3226 for (i = 0; i <= ss->rx_big.mask; i++) { 3227 bus_dmamap_destroy(ss->rx_big.dmat, 3228 ss->rx_big.info[i].map); 3229 } 3230 bus_dmamap_destroy(ss->rx_big.dmat, 3231 ss->rx_big.extra_map); 3232 bus_dma_tag_destroy(ss->rx_big.dmat); 3233 } 3234 free(ss->rx_big.info, M_DEVBUF); 3235 } 3236 ss->rx_big.info = NULL; 3237 } 3238 3239 static void 3240 mxge_free_rings(mxge_softc_t *sc) 3241 { 3242 int slice; 3243 3244 for (slice = 0; slice < sc->num_slices; slice++) 3245 mxge_free_slice_rings(&sc->ss[slice]); 3246 } 3247 3248 static int 3249 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3250 int tx_ring_entries) 3251 { 3252 mxge_softc_t *sc = ss->sc; 3253 size_t bytes; 3254 int err, i; 3255 3256 /* allocate per-slice receive resources */ 3257 3258 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3259 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3260 3261 /* allocate the rx shadow rings */ 3262 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3263 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3264 3265 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3266 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3267 3268 /* allocate the rx host info rings */ 3269 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3270 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3271 3272 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3273 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3274 3275 /* allocate the rx busdma resources */ 3276 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3277 1, /* alignment */ 3278 4096, /* boundary */ 3279 BUS_SPACE_MAXADDR, /* low */ 3280 BUS_SPACE_MAXADDR, /* high */ 3281 NULL, NULL, /* filter */ 3282 MHLEN, /* maxsize */ 3283 1, /* num segs */ 3284 MHLEN, /* maxsegsize */ 3285 BUS_DMA_ALLOCNOW, /* flags */ 3286 NULL, NULL, /* lock */ 3287 &ss->rx_small.dmat); /* tag */ 3288 if (err != 0) { 3289 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3290 err); 3291 return err; 3292 } 3293 3294 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3295 1, /* alignment */ 3296 #if MXGE_VIRT_JUMBOS 3297 4096, /* boundary */ 3298 #else 3299 0, /* boundary */ 3300 #endif 3301 BUS_SPACE_MAXADDR, /* low */ 3302 BUS_SPACE_MAXADDR, /* high */ 3303 NULL, NULL, /* filter */ 3304 3*4096, /* maxsize */ 3305 #if MXGE_VIRT_JUMBOS 3306 3, /* num segs */ 3307 4096, /* maxsegsize*/ 3308 #else 3309 1, /* num segs */ 3310 MJUM9BYTES, /* maxsegsize*/ 3311 #endif 3312 BUS_DMA_ALLOCNOW, /* flags */ 3313 NULL, NULL, /* lock */ 3314 &ss->rx_big.dmat); /* tag */ 3315 if (err != 0) { 3316 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3317 err); 3318 return err; 3319 } 3320 for (i = 0; i <= ss->rx_small.mask; i++) { 3321 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3322 &ss->rx_small.info[i].map); 3323 if (err != 0) { 3324 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3325 err); 3326 return err; 3327 } 3328 } 3329 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3330 &ss->rx_small.extra_map); 3331 if (err != 0) { 3332 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3333 err); 3334 return err; 3335 } 3336 3337 for (i = 0; i <= ss->rx_big.mask; i++) { 3338 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3339 &ss->rx_big.info[i].map); 3340 if (err != 0) { 3341 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3342 err); 3343 return err; 3344 } 3345 } 3346 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3347 &ss->rx_big.extra_map); 3348 if (err != 0) { 3349 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3350 err); 3351 return err; 3352 } 3353 3354 /* now allocate TX resources */ 3355 3356 #ifndef IFNET_BUF_RING 3357 /* only use a single TX ring for now */ 3358 if (ss != ss->sc->ss) 3359 return 0; 3360 #endif 3361 3362 ss->tx.mask = tx_ring_entries - 1; 3363 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3364 3365 /* allocate the tx request copy block */ 3366 bytes = 8 + 3367 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3368 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3369 /* ensure req_list entries are aligned to 8 bytes */ 3370 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3371 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL); 3372 3373 /* allocate the tx busdma segment list */ 3374 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3375 ss->tx.seg_list = (bus_dma_segment_t *) 3376 malloc(bytes, M_DEVBUF, M_WAITOK); 3377 3378 /* allocate the tx host info ring */ 3379 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3380 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3381 3382 /* allocate the tx busdma resources */ 3383 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3384 1, /* alignment */ 3385 sc->tx_boundary, /* boundary */ 3386 BUS_SPACE_MAXADDR, /* low */ 3387 BUS_SPACE_MAXADDR, /* high */ 3388 NULL, NULL, /* filter */ 3389 65536 + 256, /* maxsize */ 3390 ss->tx.max_desc - 2, /* num segs */ 3391 sc->tx_boundary, /* maxsegsz */ 3392 BUS_DMA_ALLOCNOW, /* flags */ 3393 NULL, NULL, /* lock */ 3394 &ss->tx.dmat); /* tag */ 3395 3396 if (err != 0) { 3397 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3398 err); 3399 return err; 3400 } 3401 3402 /* now use these tags to setup dmamaps for each slot 3403 in the ring */ 3404 for (i = 0; i <= ss->tx.mask; i++) { 3405 err = bus_dmamap_create(ss->tx.dmat, 0, 3406 &ss->tx.info[i].map); 3407 if (err != 0) { 3408 device_printf(sc->dev, "Err %d tx dmamap\n", 3409 err); 3410 return err; 3411 } 3412 } 3413 return 0; 3414 3415 } 3416 3417 static int 3418 mxge_alloc_rings(mxge_softc_t *sc) 3419 { 3420 mxge_cmd_t cmd; 3421 int tx_ring_size; 3422 int tx_ring_entries, rx_ring_entries; 3423 int err, slice; 3424 3425 /* get ring sizes */ 3426 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3427 tx_ring_size = cmd.data0; 3428 if (err != 0) { 3429 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3430 goto abort; 3431 } 3432 3433 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3434 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3435 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3436 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3437 IFQ_SET_READY(&sc->ifp->if_snd); 3438 3439 for (slice = 0; slice < sc->num_slices; slice++) { 3440 err = mxge_alloc_slice_rings(&sc->ss[slice], 3441 rx_ring_entries, 3442 tx_ring_entries); 3443 if (err != 0) 3444 goto abort; 3445 } 3446 return 0; 3447 3448 abort: 3449 mxge_free_rings(sc); 3450 return err; 3451 3452 } 3453 3454 static void 3455 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3456 { 3457 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3458 3459 if (bufsize < MCLBYTES) { 3460 /* easy, everything fits in a single buffer */ 3461 *big_buf_size = MCLBYTES; 3462 *cl_size = MCLBYTES; 3463 *nbufs = 1; 3464 return; 3465 } 3466 3467 if (bufsize < MJUMPAGESIZE) { 3468 /* still easy, everything still fits in a single buffer */ 3469 *big_buf_size = MJUMPAGESIZE; 3470 *cl_size = MJUMPAGESIZE; 3471 *nbufs = 1; 3472 return; 3473 } 3474 #if MXGE_VIRT_JUMBOS 3475 /* now we need to use virtually contiguous buffers */ 3476 *cl_size = MJUM9BYTES; 3477 *big_buf_size = 4096; 3478 *nbufs = mtu / 4096 + 1; 3479 /* needs to be a power of two, so round up */ 3480 if (*nbufs == 3) 3481 *nbufs = 4; 3482 #else 3483 *cl_size = MJUM9BYTES; 3484 *big_buf_size = MJUM9BYTES; 3485 *nbufs = 1; 3486 #endif 3487 } 3488 3489 static int 3490 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3491 { 3492 mxge_softc_t *sc; 3493 mxge_cmd_t cmd; 3494 bus_dmamap_t map; 3495 int err, i, slice; 3496 3497 sc = ss->sc; 3498 slice = ss - sc->ss; 3499 3500 #if defined(INET) || defined(INET6) 3501 (void)tcp_lro_init(&ss->lc); 3502 #endif 3503 ss->lc.ifp = sc->ifp; 3504 3505 /* get the lanai pointers to the send and receive rings */ 3506 3507 err = 0; 3508 #ifndef IFNET_BUF_RING 3509 /* We currently only send from the first slice */ 3510 if (slice == 0) { 3511 #endif 3512 cmd.data0 = slice; 3513 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3514 ss->tx.lanai = 3515 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3516 ss->tx.send_go = (volatile uint32_t *) 3517 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3518 ss->tx.send_stop = (volatile uint32_t *) 3519 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3520 #ifndef IFNET_BUF_RING 3521 } 3522 #endif 3523 cmd.data0 = slice; 3524 err |= mxge_send_cmd(sc, 3525 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3526 ss->rx_small.lanai = 3527 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3528 cmd.data0 = slice; 3529 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3530 ss->rx_big.lanai = 3531 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3532 3533 if (err != 0) { 3534 device_printf(sc->dev, 3535 "failed to get ring sizes or locations\n"); 3536 return EIO; 3537 } 3538 3539 /* stock receive rings */ 3540 for (i = 0; i <= ss->rx_small.mask; i++) { 3541 map = ss->rx_small.info[i].map; 3542 err = mxge_get_buf_small(ss, map, i); 3543 if (err) { 3544 device_printf(sc->dev, "alloced %d/%d smalls\n", 3545 i, ss->rx_small.mask + 1); 3546 return ENOMEM; 3547 } 3548 } 3549 for (i = 0; i <= ss->rx_big.mask; i++) { 3550 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3551 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3552 } 3553 ss->rx_big.nbufs = nbufs; 3554 ss->rx_big.cl_size = cl_size; 3555 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3556 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3557 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3558 map = ss->rx_big.info[i].map; 3559 err = mxge_get_buf_big(ss, map, i); 3560 if (err) { 3561 device_printf(sc->dev, "alloced %d/%d bigs\n", 3562 i, ss->rx_big.mask + 1); 3563 return ENOMEM; 3564 } 3565 } 3566 return 0; 3567 } 3568 3569 static int 3570 mxge_open(mxge_softc_t *sc) 3571 { 3572 mxge_cmd_t cmd; 3573 int err, big_bytes, nbufs, slice, cl_size, i; 3574 bus_addr_t bus; 3575 volatile uint8_t *itable; 3576 struct mxge_slice_state *ss; 3577 3578 /* Copy the MAC address in case it was overridden */ 3579 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3580 3581 err = mxge_reset(sc, 1); 3582 if (err != 0) { 3583 device_printf(sc->dev, "failed to reset\n"); 3584 return EIO; 3585 } 3586 3587 if (sc->num_slices > 1) { 3588 /* setup the indirection table */ 3589 cmd.data0 = sc->num_slices; 3590 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3591 &cmd); 3592 3593 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3594 &cmd); 3595 if (err != 0) { 3596 device_printf(sc->dev, 3597 "failed to setup rss tables\n"); 3598 return err; 3599 } 3600 3601 /* just enable an identity mapping */ 3602 itable = sc->sram + cmd.data0; 3603 for (i = 0; i < sc->num_slices; i++) 3604 itable[i] = (uint8_t)i; 3605 3606 cmd.data0 = 1; 3607 cmd.data1 = mxge_rss_hash_type; 3608 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3609 if (err != 0) { 3610 device_printf(sc->dev, "failed to enable slices\n"); 3611 return err; 3612 } 3613 } 3614 3615 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3616 3617 cmd.data0 = nbufs; 3618 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3619 &cmd); 3620 /* error is only meaningful if we're trying to set 3621 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3622 if (err && nbufs > 1) { 3623 device_printf(sc->dev, 3624 "Failed to set alway-use-n to %d\n", 3625 nbufs); 3626 return EIO; 3627 } 3628 /* Give the firmware the mtu and the big and small buffer 3629 sizes. The firmware wants the big buf size to be a power 3630 of two. Luckily, FreeBSD's clusters are powers of two */ 3631 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3632 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3633 cmd.data0 = MHLEN - MXGEFW_PAD; 3634 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3635 &cmd); 3636 cmd.data0 = big_bytes; 3637 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3638 3639 if (err != 0) { 3640 device_printf(sc->dev, "failed to setup params\n"); 3641 goto abort; 3642 } 3643 3644 /* Now give him the pointer to the stats block */ 3645 for (slice = 0; 3646 #ifdef IFNET_BUF_RING 3647 slice < sc->num_slices; 3648 #else 3649 slice < 1; 3650 #endif 3651 slice++) { 3652 ss = &sc->ss[slice]; 3653 cmd.data0 = 3654 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3655 cmd.data1 = 3656 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3657 cmd.data2 = sizeof(struct mcp_irq_data); 3658 cmd.data2 |= (slice << 16); 3659 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3660 } 3661 3662 if (err != 0) { 3663 bus = sc->ss->fw_stats_dma.bus_addr; 3664 bus += offsetof(struct mcp_irq_data, send_done_count); 3665 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3666 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3667 err = mxge_send_cmd(sc, 3668 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3669 &cmd); 3670 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3671 sc->fw_multicast_support = 0; 3672 } else { 3673 sc->fw_multicast_support = 1; 3674 } 3675 3676 if (err != 0) { 3677 device_printf(sc->dev, "failed to setup params\n"); 3678 goto abort; 3679 } 3680 3681 for (slice = 0; slice < sc->num_slices; slice++) { 3682 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3683 if (err != 0) { 3684 device_printf(sc->dev, "couldn't open slice %d\n", 3685 slice); 3686 goto abort; 3687 } 3688 } 3689 3690 /* Finally, start the firmware running */ 3691 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3692 if (err) { 3693 device_printf(sc->dev, "Couldn't bring up link\n"); 3694 goto abort; 3695 } 3696 #ifdef IFNET_BUF_RING 3697 for (slice = 0; slice < sc->num_slices; slice++) { 3698 ss = &sc->ss[slice]; 3699 ss->if_drv_flags |= IFF_DRV_RUNNING; 3700 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3701 } 3702 #endif 3703 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3704 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3705 3706 return 0; 3707 3708 abort: 3709 mxge_free_mbufs(sc); 3710 3711 return err; 3712 } 3713 3714 static int 3715 mxge_close(mxge_softc_t *sc, int down) 3716 { 3717 mxge_cmd_t cmd; 3718 int err, old_down_cnt; 3719 #ifdef IFNET_BUF_RING 3720 struct mxge_slice_state *ss; 3721 int slice; 3722 #endif 3723 3724 #ifdef IFNET_BUF_RING 3725 for (slice = 0; slice < sc->num_slices; slice++) { 3726 ss = &sc->ss[slice]; 3727 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3728 } 3729 #endif 3730 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3731 if (!down) { 3732 old_down_cnt = sc->down_cnt; 3733 wmb(); 3734 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3735 if (err) { 3736 device_printf(sc->dev, 3737 "Couldn't bring down link\n"); 3738 } 3739 if (old_down_cnt == sc->down_cnt) { 3740 /* wait for down irq */ 3741 DELAY(10 * sc->intr_coal_delay); 3742 } 3743 wmb(); 3744 if (old_down_cnt == sc->down_cnt) { 3745 device_printf(sc->dev, "never got down irq\n"); 3746 } 3747 } 3748 mxge_free_mbufs(sc); 3749 3750 return 0; 3751 } 3752 3753 static void 3754 mxge_setup_cfg_space(mxge_softc_t *sc) 3755 { 3756 device_t dev = sc->dev; 3757 int reg; 3758 uint16_t lnk, pectl; 3759 3760 /* find the PCIe link width and set max read request to 4KB*/ 3761 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3762 lnk = pci_read_config(dev, reg + 0x12, 2); 3763 sc->link_width = (lnk >> 4) & 0x3f; 3764 3765 if (sc->pectl == 0) { 3766 pectl = pci_read_config(dev, reg + 0x8, 2); 3767 pectl = (pectl & ~0x7000) | (5 << 12); 3768 pci_write_config(dev, reg + 0x8, pectl, 2); 3769 sc->pectl = pectl; 3770 } else { 3771 /* restore saved pectl after watchdog reset */ 3772 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3773 } 3774 } 3775 3776 /* Enable DMA and Memory space access */ 3777 pci_enable_busmaster(dev); 3778 } 3779 3780 static uint32_t 3781 mxge_read_reboot(mxge_softc_t *sc) 3782 { 3783 device_t dev = sc->dev; 3784 uint32_t vs; 3785 3786 /* find the vendor specific offset */ 3787 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3788 device_printf(sc->dev, 3789 "could not find vendor specific offset\n"); 3790 return (uint32_t)-1; 3791 } 3792 /* enable read32 mode */ 3793 pci_write_config(dev, vs + 0x10, 0x3, 1); 3794 /* tell NIC which register to read */ 3795 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3796 return (pci_read_config(dev, vs + 0x14, 4)); 3797 } 3798 3799 static void 3800 mxge_watchdog_reset(mxge_softc_t *sc) 3801 { 3802 struct pci_devinfo *dinfo; 3803 struct mxge_slice_state *ss; 3804 int err, running, s, num_tx_slices = 1; 3805 uint32_t reboot; 3806 uint16_t cmd; 3807 3808 err = ENXIO; 3809 3810 device_printf(sc->dev, "Watchdog reset!\n"); 3811 3812 /* 3813 * check to see if the NIC rebooted. If it did, then all of 3814 * PCI config space has been reset, and things like the 3815 * busmaster bit will be zero. If this is the case, then we 3816 * must restore PCI config space before the NIC can be used 3817 * again 3818 */ 3819 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3820 if (cmd == 0xffff) { 3821 /* 3822 * maybe the watchdog caught the NIC rebooting; wait 3823 * up to 100ms for it to finish. If it does not come 3824 * back, then give up 3825 */ 3826 DELAY(1000*100); 3827 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3828 if (cmd == 0xffff) { 3829 device_printf(sc->dev, "NIC disappeared!\n"); 3830 } 3831 } 3832 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3833 /* print the reboot status */ 3834 reboot = mxge_read_reboot(sc); 3835 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3836 reboot); 3837 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3838 if (running) { 3839 /* 3840 * quiesce NIC so that TX routines will not try to 3841 * xmit after restoration of BAR 3842 */ 3843 3844 /* Mark the link as down */ 3845 if (sc->link_state) { 3846 sc->link_state = 0; 3847 if_link_state_change(sc->ifp, 3848 LINK_STATE_DOWN); 3849 } 3850 #ifdef IFNET_BUF_RING 3851 num_tx_slices = sc->num_slices; 3852 #endif 3853 /* grab all TX locks to ensure no tx */ 3854 for (s = 0; s < num_tx_slices; s++) { 3855 ss = &sc->ss[s]; 3856 mtx_lock(&ss->tx.mtx); 3857 } 3858 mxge_close(sc, 1); 3859 } 3860 /* restore PCI configuration space */ 3861 dinfo = device_get_ivars(sc->dev); 3862 pci_cfg_restore(sc->dev, dinfo); 3863 3864 /* and redo any changes we made to our config space */ 3865 mxge_setup_cfg_space(sc); 3866 3867 /* reload f/w */ 3868 err = mxge_load_firmware(sc, 0); 3869 if (err) { 3870 device_printf(sc->dev, 3871 "Unable to re-load f/w\n"); 3872 } 3873 if (running) { 3874 if (!err) 3875 err = mxge_open(sc); 3876 /* release all TX locks */ 3877 for (s = 0; s < num_tx_slices; s++) { 3878 ss = &sc->ss[s]; 3879 #ifdef IFNET_BUF_RING 3880 mxge_start_locked(ss); 3881 #endif 3882 mtx_unlock(&ss->tx.mtx); 3883 } 3884 } 3885 sc->watchdog_resets++; 3886 } else { 3887 device_printf(sc->dev, 3888 "NIC did not reboot, not resetting\n"); 3889 err = 0; 3890 } 3891 if (err) { 3892 device_printf(sc->dev, "watchdog reset failed\n"); 3893 } else { 3894 if (sc->dying == 2) 3895 sc->dying = 0; 3896 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3897 } 3898 } 3899 3900 static void 3901 mxge_watchdog_task(void *arg, int pending) 3902 { 3903 mxge_softc_t *sc = arg; 3904 3905 mtx_lock(&sc->driver_mtx); 3906 mxge_watchdog_reset(sc); 3907 mtx_unlock(&sc->driver_mtx); 3908 } 3909 3910 static void 3911 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3912 { 3913 tx = &sc->ss[slice].tx; 3914 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3915 device_printf(sc->dev, 3916 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3917 tx->req, tx->done, tx->queue_active); 3918 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3919 tx->activate, tx->deactivate); 3920 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3921 tx->pkt_done, 3922 be32toh(sc->ss->fw_stats->send_done_count)); 3923 } 3924 3925 static int 3926 mxge_watchdog(mxge_softc_t *sc) 3927 { 3928 mxge_tx_ring_t *tx; 3929 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3930 int i, err = 0; 3931 3932 /* see if we have outstanding transmits, which 3933 have been pending for more than mxge_ticks */ 3934 for (i = 0; 3935 #ifdef IFNET_BUF_RING 3936 (i < sc->num_slices) && (err == 0); 3937 #else 3938 (i < 1) && (err == 0); 3939 #endif 3940 i++) { 3941 tx = &sc->ss[i].tx; 3942 if (tx->req != tx->done && 3943 tx->watchdog_req != tx->watchdog_done && 3944 tx->done == tx->watchdog_done) { 3945 /* check for pause blocking before resetting */ 3946 if (tx->watchdog_rx_pause == rx_pause) { 3947 mxge_warn_stuck(sc, tx, i); 3948 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3949 return (ENXIO); 3950 } 3951 else 3952 device_printf(sc->dev, "Flow control blocking " 3953 "xmits, check link partner\n"); 3954 } 3955 3956 tx->watchdog_req = tx->req; 3957 tx->watchdog_done = tx->done; 3958 tx->watchdog_rx_pause = rx_pause; 3959 } 3960 3961 if (sc->need_media_probe) 3962 mxge_media_probe(sc); 3963 return (err); 3964 } 3965 3966 static uint64_t 3967 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 3968 { 3969 struct mxge_softc *sc; 3970 uint64_t rv; 3971 3972 sc = if_getsoftc(ifp); 3973 rv = 0; 3974 3975 switch (cnt) { 3976 case IFCOUNTER_IPACKETS: 3977 for (int s = 0; s < sc->num_slices; s++) 3978 rv += sc->ss[s].ipackets; 3979 return (rv); 3980 case IFCOUNTER_OPACKETS: 3981 for (int s = 0; s < sc->num_slices; s++) 3982 rv += sc->ss[s].opackets; 3983 return (rv); 3984 case IFCOUNTER_OERRORS: 3985 for (int s = 0; s < sc->num_slices; s++) 3986 rv += sc->ss[s].oerrors; 3987 return (rv); 3988 #ifdef IFNET_BUF_RING 3989 case IFCOUNTER_OBYTES: 3990 for (int s = 0; s < sc->num_slices; s++) 3991 rv += sc->ss[s].obytes; 3992 return (rv); 3993 case IFCOUNTER_OMCASTS: 3994 for (int s = 0; s < sc->num_slices; s++) 3995 rv += sc->ss[s].omcasts; 3996 return (rv); 3997 case IFCOUNTER_OQDROPS: 3998 for (int s = 0; s < sc->num_slices; s++) 3999 rv += sc->ss[s].tx.br->br_drops; 4000 return (rv); 4001 #endif 4002 default: 4003 return (if_get_counter_default(ifp, cnt)); 4004 } 4005 } 4006 4007 static void 4008 mxge_tick(void *arg) 4009 { 4010 mxge_softc_t *sc = arg; 4011 u_long pkts = 0; 4012 int err = 0; 4013 int running, ticks; 4014 uint16_t cmd; 4015 4016 ticks = mxge_ticks; 4017 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4018 if (running) { 4019 if (!sc->watchdog_countdown) { 4020 err = mxge_watchdog(sc); 4021 sc->watchdog_countdown = 4; 4022 } 4023 sc->watchdog_countdown--; 4024 } 4025 if (pkts == 0) { 4026 /* ensure NIC did not suffer h/w fault while idle */ 4027 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4028 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4029 sc->dying = 2; 4030 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4031 err = ENXIO; 4032 } 4033 /* look less often if NIC is idle */ 4034 ticks *= 4; 4035 } 4036 4037 if (err == 0) 4038 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4039 4040 } 4041 4042 static int 4043 mxge_media_change(struct ifnet *ifp) 4044 { 4045 return EINVAL; 4046 } 4047 4048 static int 4049 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4050 { 4051 struct ifnet *ifp = sc->ifp; 4052 int real_mtu, old_mtu; 4053 int err = 0; 4054 4055 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4056 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4057 return EINVAL; 4058 mtx_lock(&sc->driver_mtx); 4059 old_mtu = ifp->if_mtu; 4060 ifp->if_mtu = mtu; 4061 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4062 mxge_close(sc, 0); 4063 err = mxge_open(sc); 4064 if (err != 0) { 4065 ifp->if_mtu = old_mtu; 4066 mxge_close(sc, 0); 4067 (void) mxge_open(sc); 4068 } 4069 } 4070 mtx_unlock(&sc->driver_mtx); 4071 return err; 4072 } 4073 4074 static void 4075 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4076 { 4077 mxge_softc_t *sc = ifp->if_softc; 4078 4079 if (sc == NULL) 4080 return; 4081 ifmr->ifm_status = IFM_AVALID; 4082 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4083 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4084 ifmr->ifm_active |= sc->current_media; 4085 } 4086 4087 static int 4088 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 4089 { 4090 mxge_cmd_t cmd; 4091 uint32_t i2c_args; 4092 int i, ms, err; 4093 4094 if (i2c->dev_addr != 0xA0 && 4095 i2c->dev_addr != 0xA2) 4096 return (EINVAL); 4097 if (i2c->len > sizeof(i2c->data)) 4098 return (EINVAL); 4099 4100 for (i = 0; i < i2c->len; i++) { 4101 i2c_args = i2c->dev_addr << 0x8; 4102 i2c_args |= i2c->offset + i; 4103 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 4104 cmd.data1 = i2c_args; 4105 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 4106 4107 if (err != MXGEFW_CMD_OK) 4108 return (EIO); 4109 /* now we wait for the data to be cached */ 4110 cmd.data0 = i2c_args & 0xff; 4111 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4112 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 4113 cmd.data0 = i2c_args & 0xff; 4114 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4115 if (err == EBUSY) 4116 DELAY(1000); 4117 } 4118 if (err != MXGEFW_CMD_OK) 4119 return (EIO); 4120 i2c->data[i] = cmd.data0; 4121 } 4122 return (0); 4123 } 4124 4125 static int 4126 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4127 { 4128 mxge_softc_t *sc = ifp->if_softc; 4129 struct ifreq *ifr = (struct ifreq *)data; 4130 struct ifi2creq i2c; 4131 int err, mask; 4132 4133 err = 0; 4134 switch (command) { 4135 case SIOCSIFMTU: 4136 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4137 break; 4138 4139 case SIOCSIFFLAGS: 4140 mtx_lock(&sc->driver_mtx); 4141 if (sc->dying) { 4142 mtx_unlock(&sc->driver_mtx); 4143 return EINVAL; 4144 } 4145 if (ifp->if_flags & IFF_UP) { 4146 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4147 err = mxge_open(sc); 4148 } else { 4149 /* take care of promis can allmulti 4150 flag chages */ 4151 mxge_change_promisc(sc, 4152 ifp->if_flags & IFF_PROMISC); 4153 mxge_set_multicast_list(sc); 4154 } 4155 } else { 4156 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4157 mxge_close(sc, 0); 4158 } 4159 } 4160 mtx_unlock(&sc->driver_mtx); 4161 break; 4162 4163 case SIOCADDMULTI: 4164 case SIOCDELMULTI: 4165 mtx_lock(&sc->driver_mtx); 4166 if (sc->dying) { 4167 mtx_unlock(&sc->driver_mtx); 4168 return (EINVAL); 4169 } 4170 mxge_set_multicast_list(sc); 4171 mtx_unlock(&sc->driver_mtx); 4172 break; 4173 4174 case SIOCSIFCAP: 4175 mtx_lock(&sc->driver_mtx); 4176 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4177 if (mask & IFCAP_TXCSUM) { 4178 if (IFCAP_TXCSUM & ifp->if_capenable) { 4179 mask &= ~IFCAP_TSO4; 4180 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4181 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4182 } else { 4183 ifp->if_capenable |= IFCAP_TXCSUM; 4184 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4185 } 4186 } 4187 if (mask & IFCAP_RXCSUM) { 4188 if (IFCAP_RXCSUM & ifp->if_capenable) { 4189 ifp->if_capenable &= ~IFCAP_RXCSUM; 4190 } else { 4191 ifp->if_capenable |= IFCAP_RXCSUM; 4192 } 4193 } 4194 if (mask & IFCAP_TSO4) { 4195 if (IFCAP_TSO4 & ifp->if_capenable) { 4196 ifp->if_capenable &= ~IFCAP_TSO4; 4197 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4198 ifp->if_capenable |= IFCAP_TSO4; 4199 ifp->if_hwassist |= CSUM_TSO; 4200 } else { 4201 printf("mxge requires tx checksum offload" 4202 " be enabled to use TSO\n"); 4203 err = EINVAL; 4204 } 4205 } 4206 #if IFCAP_TSO6 4207 if (mask & IFCAP_TXCSUM_IPV6) { 4208 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4209 mask &= ~IFCAP_TSO6; 4210 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4211 | IFCAP_TSO6); 4212 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4213 | CSUM_UDP); 4214 } else { 4215 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4216 ifp->if_hwassist |= (CSUM_TCP_IPV6 4217 | CSUM_UDP_IPV6); 4218 } 4219 } 4220 if (mask & IFCAP_RXCSUM_IPV6) { 4221 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4222 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4223 } else { 4224 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4225 } 4226 } 4227 if (mask & IFCAP_TSO6) { 4228 if (IFCAP_TSO6 & ifp->if_capenable) { 4229 ifp->if_capenable &= ~IFCAP_TSO6; 4230 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4231 ifp->if_capenable |= IFCAP_TSO6; 4232 ifp->if_hwassist |= CSUM_TSO; 4233 } else { 4234 printf("mxge requires tx checksum offload" 4235 " be enabled to use TSO\n"); 4236 err = EINVAL; 4237 } 4238 } 4239 #endif /*IFCAP_TSO6 */ 4240 4241 if (mask & IFCAP_LRO) 4242 ifp->if_capenable ^= IFCAP_LRO; 4243 if (mask & IFCAP_VLAN_HWTAGGING) 4244 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4245 if (mask & IFCAP_VLAN_HWTSO) 4246 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4247 4248 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4249 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4250 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4251 4252 mtx_unlock(&sc->driver_mtx); 4253 VLAN_CAPABILITIES(ifp); 4254 4255 break; 4256 4257 case SIOCGIFMEDIA: 4258 mtx_lock(&sc->driver_mtx); 4259 if (sc->dying) { 4260 mtx_unlock(&sc->driver_mtx); 4261 return (EINVAL); 4262 } 4263 mxge_media_probe(sc); 4264 mtx_unlock(&sc->driver_mtx); 4265 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4266 &sc->media, command); 4267 break; 4268 4269 case SIOCGI2C: 4270 if (sc->connector != MXGE_XFP && 4271 sc->connector != MXGE_SFP) { 4272 err = ENXIO; 4273 break; 4274 } 4275 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4276 if (err != 0) 4277 break; 4278 mtx_lock(&sc->driver_mtx); 4279 if (sc->dying) { 4280 mtx_unlock(&sc->driver_mtx); 4281 return (EINVAL); 4282 } 4283 err = mxge_fetch_i2c(sc, &i2c); 4284 mtx_unlock(&sc->driver_mtx); 4285 if (err == 0) 4286 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4287 sizeof(i2c)); 4288 break; 4289 default: 4290 err = ether_ioctl(ifp, command, data); 4291 break; 4292 } 4293 return err; 4294 } 4295 4296 static void 4297 mxge_fetch_tunables(mxge_softc_t *sc) 4298 { 4299 4300 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4301 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4302 &mxge_flow_control); 4303 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4304 &mxge_intr_coal_delay); 4305 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4306 &mxge_nvidia_ecrc_enable); 4307 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4308 &mxge_force_firmware); 4309 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4310 &mxge_deassert_wait); 4311 TUNABLE_INT_FETCH("hw.mxge.verbose", 4312 &mxge_verbose); 4313 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4314 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4315 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4316 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4317 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4318 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4319 4320 if (bootverbose) 4321 mxge_verbose = 1; 4322 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4323 mxge_intr_coal_delay = 30; 4324 if (mxge_ticks == 0) 4325 mxge_ticks = hz / 2; 4326 sc->pause = mxge_flow_control; 4327 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4328 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4329 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4330 } 4331 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4332 mxge_initial_mtu < ETHER_MIN_LEN) 4333 mxge_initial_mtu = ETHERMTU_JUMBO; 4334 4335 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4336 mxge_throttle = MXGE_MAX_THROTTLE; 4337 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4338 mxge_throttle = MXGE_MIN_THROTTLE; 4339 sc->throttle = mxge_throttle; 4340 } 4341 4342 static void 4343 mxge_free_slices(mxge_softc_t *sc) 4344 { 4345 struct mxge_slice_state *ss; 4346 int i; 4347 4348 if (sc->ss == NULL) 4349 return; 4350 4351 for (i = 0; i < sc->num_slices; i++) { 4352 ss = &sc->ss[i]; 4353 if (ss->fw_stats != NULL) { 4354 mxge_dma_free(&ss->fw_stats_dma); 4355 ss->fw_stats = NULL; 4356 #ifdef IFNET_BUF_RING 4357 if (ss->tx.br != NULL) { 4358 drbr_free(ss->tx.br, M_DEVBUF); 4359 ss->tx.br = NULL; 4360 } 4361 #endif 4362 mtx_destroy(&ss->tx.mtx); 4363 } 4364 if (ss->rx_done.entry != NULL) { 4365 mxge_dma_free(&ss->rx_done.dma); 4366 ss->rx_done.entry = NULL; 4367 } 4368 } 4369 free(sc->ss, M_DEVBUF); 4370 sc->ss = NULL; 4371 } 4372 4373 static int 4374 mxge_alloc_slices(mxge_softc_t *sc) 4375 { 4376 mxge_cmd_t cmd; 4377 struct mxge_slice_state *ss; 4378 size_t bytes; 4379 int err, i, max_intr_slots; 4380 4381 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4382 if (err != 0) { 4383 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4384 return err; 4385 } 4386 sc->rx_ring_size = cmd.data0; 4387 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4388 4389 bytes = sizeof (*sc->ss) * sc->num_slices; 4390 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4391 if (sc->ss == NULL) 4392 return (ENOMEM); 4393 for (i = 0; i < sc->num_slices; i++) { 4394 ss = &sc->ss[i]; 4395 4396 ss->sc = sc; 4397 4398 /* allocate per-slice rx interrupt queues */ 4399 4400 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4401 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4402 if (err != 0) 4403 goto abort; 4404 ss->rx_done.entry = ss->rx_done.dma.addr; 4405 bzero(ss->rx_done.entry, bytes); 4406 4407 /* 4408 * allocate the per-slice firmware stats; stats 4409 * (including tx) are used used only on the first 4410 * slice for now 4411 */ 4412 #ifndef IFNET_BUF_RING 4413 if (i > 0) 4414 continue; 4415 #endif 4416 4417 bytes = sizeof (*ss->fw_stats); 4418 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4419 sizeof (*ss->fw_stats), 64); 4420 if (err != 0) 4421 goto abort; 4422 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4423 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4424 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4425 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4426 #ifdef IFNET_BUF_RING 4427 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4428 &ss->tx.mtx); 4429 #endif 4430 } 4431 4432 return (0); 4433 4434 abort: 4435 mxge_free_slices(sc); 4436 return (ENOMEM); 4437 } 4438 4439 static void 4440 mxge_slice_probe(mxge_softc_t *sc) 4441 { 4442 mxge_cmd_t cmd; 4443 char *old_fw; 4444 int msix_cnt, status, max_intr_slots; 4445 4446 sc->num_slices = 1; 4447 /* 4448 * don't enable multiple slices if they are not enabled, 4449 * or if this is not an SMP system 4450 */ 4451 4452 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4453 return; 4454 4455 /* see how many MSI-X interrupts are available */ 4456 msix_cnt = pci_msix_count(sc->dev); 4457 if (msix_cnt < 2) 4458 return; 4459 4460 /* now load the slice aware firmware see what it supports */ 4461 old_fw = sc->fw_name; 4462 if (old_fw == mxge_fw_aligned) 4463 sc->fw_name = mxge_fw_rss_aligned; 4464 else 4465 sc->fw_name = mxge_fw_rss_unaligned; 4466 status = mxge_load_firmware(sc, 0); 4467 if (status != 0) { 4468 device_printf(sc->dev, "Falling back to a single slice\n"); 4469 return; 4470 } 4471 4472 /* try to send a reset command to the card to see if it 4473 is alive */ 4474 memset(&cmd, 0, sizeof (cmd)); 4475 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4476 if (status != 0) { 4477 device_printf(sc->dev, "failed reset\n"); 4478 goto abort_with_fw; 4479 } 4480 4481 /* get rx ring size */ 4482 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4483 if (status != 0) { 4484 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4485 goto abort_with_fw; 4486 } 4487 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4488 4489 /* tell it the size of the interrupt queues */ 4490 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4491 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4492 if (status != 0) { 4493 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4494 goto abort_with_fw; 4495 } 4496 4497 /* ask the maximum number of slices it supports */ 4498 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4499 if (status != 0) { 4500 device_printf(sc->dev, 4501 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4502 goto abort_with_fw; 4503 } 4504 sc->num_slices = cmd.data0; 4505 if (sc->num_slices > msix_cnt) 4506 sc->num_slices = msix_cnt; 4507 4508 if (mxge_max_slices == -1) { 4509 /* cap to number of CPUs in system */ 4510 if (sc->num_slices > mp_ncpus) 4511 sc->num_slices = mp_ncpus; 4512 } else { 4513 if (sc->num_slices > mxge_max_slices) 4514 sc->num_slices = mxge_max_slices; 4515 } 4516 /* make sure it is a power of two */ 4517 while (sc->num_slices & (sc->num_slices - 1)) 4518 sc->num_slices--; 4519 4520 if (mxge_verbose) 4521 device_printf(sc->dev, "using %d slices\n", 4522 sc->num_slices); 4523 4524 return; 4525 4526 abort_with_fw: 4527 sc->fw_name = old_fw; 4528 (void) mxge_load_firmware(sc, 0); 4529 } 4530 4531 static int 4532 mxge_add_msix_irqs(mxge_softc_t *sc) 4533 { 4534 size_t bytes; 4535 int count, err, i, rid; 4536 4537 rid = PCIR_BAR(2); 4538 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4539 &rid, RF_ACTIVE); 4540 4541 if (sc->msix_table_res == NULL) { 4542 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4543 return ENXIO; 4544 } 4545 4546 count = sc->num_slices; 4547 err = pci_alloc_msix(sc->dev, &count); 4548 if (err != 0) { 4549 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4550 "err = %d \n", sc->num_slices, err); 4551 goto abort_with_msix_table; 4552 } 4553 if (count < sc->num_slices) { 4554 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4555 count, sc->num_slices); 4556 device_printf(sc->dev, 4557 "Try setting hw.mxge.max_slices to %d\n", 4558 count); 4559 err = ENOSPC; 4560 goto abort_with_msix; 4561 } 4562 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4563 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4564 if (sc->msix_irq_res == NULL) { 4565 err = ENOMEM; 4566 goto abort_with_msix; 4567 } 4568 4569 for (i = 0; i < sc->num_slices; i++) { 4570 rid = i + 1; 4571 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4572 SYS_RES_IRQ, 4573 &rid, RF_ACTIVE); 4574 if (sc->msix_irq_res[i] == NULL) { 4575 device_printf(sc->dev, "couldn't allocate IRQ res" 4576 " for message %d\n", i); 4577 err = ENXIO; 4578 goto abort_with_res; 4579 } 4580 } 4581 4582 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4583 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4584 4585 for (i = 0; i < sc->num_slices; i++) { 4586 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4587 INTR_TYPE_NET | INTR_MPSAFE, 4588 #if __FreeBSD_version > 700030 4589 NULL, 4590 #endif 4591 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4592 if (err != 0) { 4593 device_printf(sc->dev, "couldn't setup intr for " 4594 "message %d\n", i); 4595 goto abort_with_intr; 4596 } 4597 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4598 sc->msix_ih[i], "s%d", i); 4599 } 4600 4601 if (mxge_verbose) { 4602 device_printf(sc->dev, "using %d msix IRQs:", 4603 sc->num_slices); 4604 for (i = 0; i < sc->num_slices; i++) 4605 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4606 printf("\n"); 4607 } 4608 return (0); 4609 4610 abort_with_intr: 4611 for (i = 0; i < sc->num_slices; i++) { 4612 if (sc->msix_ih[i] != NULL) { 4613 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4614 sc->msix_ih[i]); 4615 sc->msix_ih[i] = NULL; 4616 } 4617 } 4618 free(sc->msix_ih, M_DEVBUF); 4619 4620 abort_with_res: 4621 for (i = 0; i < sc->num_slices; i++) { 4622 rid = i + 1; 4623 if (sc->msix_irq_res[i] != NULL) 4624 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4625 sc->msix_irq_res[i]); 4626 sc->msix_irq_res[i] = NULL; 4627 } 4628 free(sc->msix_irq_res, M_DEVBUF); 4629 4630 abort_with_msix: 4631 pci_release_msi(sc->dev); 4632 4633 abort_with_msix_table: 4634 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4635 sc->msix_table_res); 4636 4637 return err; 4638 } 4639 4640 static int 4641 mxge_add_single_irq(mxge_softc_t *sc) 4642 { 4643 int count, err, rid; 4644 4645 count = pci_msi_count(sc->dev); 4646 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4647 rid = 1; 4648 } else { 4649 rid = 0; 4650 sc->legacy_irq = 1; 4651 } 4652 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4653 RF_SHAREABLE | RF_ACTIVE); 4654 if (sc->irq_res == NULL) { 4655 device_printf(sc->dev, "could not alloc interrupt\n"); 4656 return ENXIO; 4657 } 4658 if (mxge_verbose) 4659 device_printf(sc->dev, "using %s irq %jd\n", 4660 sc->legacy_irq ? "INTx" : "MSI", 4661 rman_get_start(sc->irq_res)); 4662 err = bus_setup_intr(sc->dev, sc->irq_res, 4663 INTR_TYPE_NET | INTR_MPSAFE, 4664 #if __FreeBSD_version > 700030 4665 NULL, 4666 #endif 4667 mxge_intr, &sc->ss[0], &sc->ih); 4668 if (err != 0) { 4669 bus_release_resource(sc->dev, SYS_RES_IRQ, 4670 sc->legacy_irq ? 0 : 1, sc->irq_res); 4671 if (!sc->legacy_irq) 4672 pci_release_msi(sc->dev); 4673 } 4674 return err; 4675 } 4676 4677 static void 4678 mxge_rem_msix_irqs(mxge_softc_t *sc) 4679 { 4680 int i, rid; 4681 4682 for (i = 0; i < sc->num_slices; i++) { 4683 if (sc->msix_ih[i] != NULL) { 4684 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4685 sc->msix_ih[i]); 4686 sc->msix_ih[i] = NULL; 4687 } 4688 } 4689 free(sc->msix_ih, M_DEVBUF); 4690 4691 for (i = 0; i < sc->num_slices; i++) { 4692 rid = i + 1; 4693 if (sc->msix_irq_res[i] != NULL) 4694 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4695 sc->msix_irq_res[i]); 4696 sc->msix_irq_res[i] = NULL; 4697 } 4698 free(sc->msix_irq_res, M_DEVBUF); 4699 4700 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4701 sc->msix_table_res); 4702 4703 pci_release_msi(sc->dev); 4704 return; 4705 } 4706 4707 static void 4708 mxge_rem_single_irq(mxge_softc_t *sc) 4709 { 4710 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4711 bus_release_resource(sc->dev, SYS_RES_IRQ, 4712 sc->legacy_irq ? 0 : 1, sc->irq_res); 4713 if (!sc->legacy_irq) 4714 pci_release_msi(sc->dev); 4715 } 4716 4717 static void 4718 mxge_rem_irq(mxge_softc_t *sc) 4719 { 4720 if (sc->num_slices > 1) 4721 mxge_rem_msix_irqs(sc); 4722 else 4723 mxge_rem_single_irq(sc); 4724 } 4725 4726 static int 4727 mxge_add_irq(mxge_softc_t *sc) 4728 { 4729 int err; 4730 4731 if (sc->num_slices > 1) 4732 err = mxge_add_msix_irqs(sc); 4733 else 4734 err = mxge_add_single_irq(sc); 4735 4736 if (0 && err == 0 && sc->num_slices > 1) { 4737 mxge_rem_msix_irqs(sc); 4738 err = mxge_add_msix_irqs(sc); 4739 } 4740 return err; 4741 } 4742 4743 static int 4744 mxge_attach(device_t dev) 4745 { 4746 mxge_cmd_t cmd; 4747 mxge_softc_t *sc = device_get_softc(dev); 4748 struct ifnet *ifp; 4749 int err, rid; 4750 4751 sc->dev = dev; 4752 mxge_fetch_tunables(sc); 4753 4754 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4755 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4756 taskqueue_thread_enqueue, &sc->tq); 4757 if (sc->tq == NULL) { 4758 err = ENOMEM; 4759 goto abort_with_nothing; 4760 } 4761 4762 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4763 1, /* alignment */ 4764 0, /* boundary */ 4765 BUS_SPACE_MAXADDR, /* low */ 4766 BUS_SPACE_MAXADDR, /* high */ 4767 NULL, NULL, /* filter */ 4768 65536 + 256, /* maxsize */ 4769 MXGE_MAX_SEND_DESC, /* num segs */ 4770 65536, /* maxsegsize */ 4771 0, /* flags */ 4772 NULL, NULL, /* lock */ 4773 &sc->parent_dmat); /* tag */ 4774 4775 if (err != 0) { 4776 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4777 err); 4778 goto abort_with_tq; 4779 } 4780 4781 ifp = sc->ifp = if_alloc(IFT_ETHER); 4782 if (ifp == NULL) { 4783 device_printf(dev, "can not if_alloc()\n"); 4784 err = ENOSPC; 4785 goto abort_with_parent_dmat; 4786 } 4787 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4788 4789 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4790 device_get_nameunit(dev)); 4791 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4792 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4793 "%s:drv", device_get_nameunit(dev)); 4794 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4795 MTX_NETWORK_LOCK, MTX_DEF); 4796 4797 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4798 4799 mxge_setup_cfg_space(sc); 4800 4801 /* Map the board into the kernel */ 4802 rid = PCIR_BARS; 4803 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4804 RF_ACTIVE); 4805 if (sc->mem_res == NULL) { 4806 device_printf(dev, "could not map memory\n"); 4807 err = ENXIO; 4808 goto abort_with_lock; 4809 } 4810 sc->sram = rman_get_virtual(sc->mem_res); 4811 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4812 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4813 device_printf(dev, "impossible memory region size %jd\n", 4814 rman_get_size(sc->mem_res)); 4815 err = ENXIO; 4816 goto abort_with_mem_res; 4817 } 4818 4819 /* make NULL terminated copy of the EEPROM strings section of 4820 lanai SRAM */ 4821 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4822 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4823 rman_get_bushandle(sc->mem_res), 4824 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4825 sc->eeprom_strings, 4826 MXGE_EEPROM_STRINGS_SIZE - 2); 4827 err = mxge_parse_strings(sc); 4828 if (err != 0) 4829 goto abort_with_mem_res; 4830 4831 /* Enable write combining for efficient use of PCIe bus */ 4832 mxge_enable_wc(sc); 4833 4834 /* Allocate the out of band dma memory */ 4835 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4836 sizeof (mxge_cmd_t), 64); 4837 if (err != 0) 4838 goto abort_with_mem_res; 4839 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4840 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4841 if (err != 0) 4842 goto abort_with_cmd_dma; 4843 4844 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4845 if (err != 0) 4846 goto abort_with_zeropad_dma; 4847 4848 /* select & load the firmware */ 4849 err = mxge_select_firmware(sc); 4850 if (err != 0) 4851 goto abort_with_dmabench; 4852 sc->intr_coal_delay = mxge_intr_coal_delay; 4853 4854 mxge_slice_probe(sc); 4855 err = mxge_alloc_slices(sc); 4856 if (err != 0) 4857 goto abort_with_dmabench; 4858 4859 err = mxge_reset(sc, 0); 4860 if (err != 0) 4861 goto abort_with_slices; 4862 4863 err = mxge_alloc_rings(sc); 4864 if (err != 0) { 4865 device_printf(sc->dev, "failed to allocate rings\n"); 4866 goto abort_with_slices; 4867 } 4868 4869 err = mxge_add_irq(sc); 4870 if (err != 0) { 4871 device_printf(sc->dev, "failed to add irq\n"); 4872 goto abort_with_rings; 4873 } 4874 4875 ifp->if_baudrate = IF_Gbps(10); 4876 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4877 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4878 IFCAP_RXCSUM_IPV6; 4879 #if defined(INET) || defined(INET6) 4880 ifp->if_capabilities |= IFCAP_LRO; 4881 #endif 4882 4883 #ifdef MXGE_NEW_VLAN_API 4884 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4885 4886 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4887 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4888 sc->fw_ver_tiny >= 32) 4889 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4890 #endif 4891 sc->max_mtu = mxge_max_mtu(sc); 4892 if (sc->max_mtu >= 9000) 4893 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4894 else 4895 device_printf(dev, "MTU limited to %d. Install " 4896 "latest firmware for 9000 byte jumbo support\n", 4897 sc->max_mtu - ETHER_HDR_LEN); 4898 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4899 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4900 /* check to see if f/w supports TSO for IPv6 */ 4901 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4902 if (CSUM_TCP_IPV6) 4903 ifp->if_capabilities |= IFCAP_TSO6; 4904 sc->max_tso6_hlen = min(cmd.data0, 4905 sizeof (sc->ss[0].scratch)); 4906 } 4907 ifp->if_capenable = ifp->if_capabilities; 4908 if (sc->lro_cnt == 0) 4909 ifp->if_capenable &= ~IFCAP_LRO; 4910 ifp->if_init = mxge_init; 4911 ifp->if_softc = sc; 4912 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4913 ifp->if_ioctl = mxge_ioctl; 4914 ifp->if_start = mxge_start; 4915 ifp->if_get_counter = mxge_get_counter; 4916 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4917 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4918 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4919 /* Initialise the ifmedia structure */ 4920 ifmedia_init(&sc->media, 0, mxge_media_change, 4921 mxge_media_status); 4922 mxge_media_init(sc); 4923 mxge_media_probe(sc); 4924 sc->dying = 0; 4925 ether_ifattach(ifp, sc->mac_addr); 4926 /* ether_ifattach sets mtu to ETHERMTU */ 4927 if (mxge_initial_mtu != ETHERMTU) 4928 mxge_change_mtu(sc, mxge_initial_mtu); 4929 4930 mxge_add_sysctls(sc); 4931 #ifdef IFNET_BUF_RING 4932 ifp->if_transmit = mxge_transmit; 4933 ifp->if_qflush = mxge_qflush; 4934 #endif 4935 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4936 device_get_nameunit(sc->dev)); 4937 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4938 return 0; 4939 4940 abort_with_rings: 4941 mxge_free_rings(sc); 4942 abort_with_slices: 4943 mxge_free_slices(sc); 4944 abort_with_dmabench: 4945 mxge_dma_free(&sc->dmabench_dma); 4946 abort_with_zeropad_dma: 4947 mxge_dma_free(&sc->zeropad_dma); 4948 abort_with_cmd_dma: 4949 mxge_dma_free(&sc->cmd_dma); 4950 abort_with_mem_res: 4951 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4952 abort_with_lock: 4953 pci_disable_busmaster(dev); 4954 mtx_destroy(&sc->cmd_mtx); 4955 mtx_destroy(&sc->driver_mtx); 4956 if_free(ifp); 4957 abort_with_parent_dmat: 4958 bus_dma_tag_destroy(sc->parent_dmat); 4959 abort_with_tq: 4960 if (sc->tq != NULL) { 4961 taskqueue_drain(sc->tq, &sc->watchdog_task); 4962 taskqueue_free(sc->tq); 4963 sc->tq = NULL; 4964 } 4965 abort_with_nothing: 4966 return err; 4967 } 4968 4969 static int 4970 mxge_detach(device_t dev) 4971 { 4972 mxge_softc_t *sc = device_get_softc(dev); 4973 4974 if (mxge_vlans_active(sc)) { 4975 device_printf(sc->dev, 4976 "Detach vlans before removing module\n"); 4977 return EBUSY; 4978 } 4979 mtx_lock(&sc->driver_mtx); 4980 sc->dying = 1; 4981 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4982 mxge_close(sc, 0); 4983 mtx_unlock(&sc->driver_mtx); 4984 ether_ifdetach(sc->ifp); 4985 if (sc->tq != NULL) { 4986 taskqueue_drain(sc->tq, &sc->watchdog_task); 4987 taskqueue_free(sc->tq); 4988 sc->tq = NULL; 4989 } 4990 callout_drain(&sc->co_hdl); 4991 ifmedia_removeall(&sc->media); 4992 mxge_dummy_rdma(sc, 0); 4993 mxge_rem_sysctls(sc); 4994 mxge_rem_irq(sc); 4995 mxge_free_rings(sc); 4996 mxge_free_slices(sc); 4997 mxge_dma_free(&sc->dmabench_dma); 4998 mxge_dma_free(&sc->zeropad_dma); 4999 mxge_dma_free(&sc->cmd_dma); 5000 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5001 pci_disable_busmaster(dev); 5002 mtx_destroy(&sc->cmd_mtx); 5003 mtx_destroy(&sc->driver_mtx); 5004 if_free(sc->ifp); 5005 bus_dma_tag_destroy(sc->parent_dmat); 5006 return 0; 5007 } 5008 5009 static int 5010 mxge_shutdown(device_t dev) 5011 { 5012 return 0; 5013 } 5014 5015 /* 5016 This file uses Myri10GE driver indentation. 5017 5018 Local Variables: 5019 c-file-style:"linux" 5020 tab-width:8 5021 End: 5022 */ 5023