1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #ifdef IFNET_BUF_RING 96 #include <sys/buf_ring.h> 97 #endif 98 99 #include "opt_inet.h" 100 #include "opt_inet6.h" 101 102 /* tunable params */ 103 static int mxge_nvidia_ecrc_enable = 1; 104 static int mxge_force_firmware = 0; 105 static int mxge_intr_coal_delay = 30; 106 static int mxge_deassert_wait = 1; 107 static int mxge_flow_control = 1; 108 static int mxge_verbose = 0; 109 static int mxge_ticks; 110 static int mxge_max_slices = 1; 111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 112 static int mxge_always_promisc = 0; 113 static int mxge_initial_mtu = ETHERMTU_JUMBO; 114 static int mxge_throttle = 0; 115 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 116 static char *mxge_fw_aligned = "mxge_eth_z8e"; 117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 119 120 static int mxge_probe(device_t dev); 121 static int mxge_attach(device_t dev); 122 static int mxge_detach(device_t dev); 123 static int mxge_shutdown(device_t dev); 124 static void mxge_intr(void *arg); 125 126 static device_method_t mxge_methods[] = 127 { 128 /* Device interface */ 129 DEVMETHOD(device_probe, mxge_probe), 130 DEVMETHOD(device_attach, mxge_attach), 131 DEVMETHOD(device_detach, mxge_detach), 132 DEVMETHOD(device_shutdown, mxge_shutdown), 133 134 DEVMETHOD_END 135 }; 136 137 static driver_t mxge_driver = 138 { 139 "mxge", 140 mxge_methods, 141 sizeof(mxge_softc_t), 142 }; 143 144 static devclass_t mxge_devclass; 145 146 /* Declare ourselves to be a child of the PCI bus.*/ 147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 148 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 149 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 150 151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 153 static int mxge_close(mxge_softc_t *sc, int down); 154 static int mxge_open(mxge_softc_t *sc); 155 static void mxge_tick(void *arg); 156 157 static int 158 mxge_probe(device_t dev) 159 { 160 int rev; 161 162 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 163 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 164 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 165 rev = pci_get_revid(dev); 166 switch (rev) { 167 case MXGE_PCI_REV_Z8E: 168 device_set_desc(dev, "Myri10G-PCIE-8A"); 169 break; 170 case MXGE_PCI_REV_Z8ES: 171 device_set_desc(dev, "Myri10G-PCIE-8B"); 172 break; 173 default: 174 device_set_desc(dev, "Myri10G-PCIE-8??"); 175 device_printf(dev, "Unrecognized rev %d NIC\n", 176 rev); 177 break; 178 } 179 return 0; 180 } 181 return ENXIO; 182 } 183 184 static void 185 mxge_enable_wc(mxge_softc_t *sc) 186 { 187 #if defined(__i386) || defined(__amd64) 188 vm_offset_t len; 189 int err; 190 191 sc->wc = 1; 192 len = rman_get_size(sc->mem_res); 193 err = pmap_change_attr((vm_offset_t) sc->sram, 194 len, PAT_WRITE_COMBINING); 195 if (err != 0) { 196 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 197 err); 198 sc->wc = 0; 199 } 200 #endif 201 } 202 203 /* callback to get our DMA address */ 204 static void 205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 206 int error) 207 { 208 if (error == 0) { 209 *(bus_addr_t *) arg = segs->ds_addr; 210 } 211 } 212 213 static int 214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 215 bus_size_t alignment) 216 { 217 int err; 218 device_t dev = sc->dev; 219 bus_size_t boundary, maxsegsize; 220 221 if (bytes > 4096 && alignment == 4096) { 222 boundary = 0; 223 maxsegsize = bytes; 224 } else { 225 boundary = 4096; 226 maxsegsize = 4096; 227 } 228 229 /* allocate DMAable memory tags */ 230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 231 alignment, /* alignment */ 232 boundary, /* boundary */ 233 BUS_SPACE_MAXADDR, /* low */ 234 BUS_SPACE_MAXADDR, /* high */ 235 NULL, NULL, /* filter */ 236 bytes, /* maxsize */ 237 1, /* num segs */ 238 maxsegsize, /* maxsegsize */ 239 BUS_DMA_COHERENT, /* flags */ 240 NULL, NULL, /* lock */ 241 &dma->dmat); /* tag */ 242 if (err != 0) { 243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 244 return err; 245 } 246 247 /* allocate DMAable memory & map */ 248 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 250 | BUS_DMA_ZERO), &dma->map); 251 if (err != 0) { 252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 253 goto abort_with_dmat; 254 } 255 256 /* load the memory */ 257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 258 mxge_dmamap_callback, 259 (void *)&dma->bus_addr, 0); 260 if (err != 0) { 261 device_printf(dev, "couldn't load map (err = %d)\n", err); 262 goto abort_with_mem; 263 } 264 return 0; 265 266 abort_with_mem: 267 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 268 abort_with_dmat: 269 (void)bus_dma_tag_destroy(dma->dmat); 270 return err; 271 } 272 273 static void 274 mxge_dma_free(mxge_dma_t *dma) 275 { 276 bus_dmamap_unload(dma->dmat, dma->map); 277 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 278 (void)bus_dma_tag_destroy(dma->dmat); 279 } 280 281 /* 282 * The eeprom strings on the lanaiX have the format 283 * SN=x\0 284 * MAC=x:x:x:x:x:x\0 285 * PC=text\0 286 */ 287 288 static int 289 mxge_parse_strings(mxge_softc_t *sc) 290 { 291 char *ptr; 292 int i, found_mac, found_sn2; 293 char *endptr; 294 295 ptr = sc->eeprom_strings; 296 found_mac = 0; 297 found_sn2 = 0; 298 while (*ptr != '\0') { 299 if (strncmp(ptr, "MAC=", 4) == 0) { 300 ptr += 4; 301 for (i = 0;;) { 302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 303 if (endptr - ptr != 2) 304 goto abort; 305 ptr = endptr; 306 if (++i == 6) 307 break; 308 if (*ptr++ != ':') 309 goto abort; 310 } 311 found_mac = 1; 312 } else if (strncmp(ptr, "PC=", 3) == 0) { 313 ptr += 3; 314 strlcpy(sc->product_code_string, ptr, 315 sizeof(sc->product_code_string)); 316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 317 ptr += 3; 318 strlcpy(sc->serial_number_string, ptr, 319 sizeof(sc->serial_number_string)); 320 } else if (strncmp(ptr, "SN2=", 4) == 0) { 321 /* SN2 takes precedence over SN */ 322 ptr += 4; 323 found_sn2 = 1; 324 strlcpy(sc->serial_number_string, ptr, 325 sizeof(sc->serial_number_string)); 326 } 327 while (*ptr++ != '\0') {} 328 } 329 330 if (found_mac) 331 return 0; 332 333 abort: 334 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 335 336 return ENXIO; 337 } 338 339 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 340 static void 341 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 342 { 343 uint32_t val; 344 unsigned long base, off; 345 char *va, *cfgptr; 346 device_t pdev, mcp55; 347 uint16_t vendor_id, device_id, word; 348 uintptr_t bus, slot, func, ivend, idev; 349 uint32_t *ptr32; 350 351 if (!mxge_nvidia_ecrc_enable) 352 return; 353 354 pdev = device_get_parent(device_get_parent(sc->dev)); 355 if (pdev == NULL) { 356 device_printf(sc->dev, "could not find parent?\n"); 357 return; 358 } 359 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 360 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 361 362 if (vendor_id != 0x10de) 363 return; 364 365 base = 0; 366 367 if (device_id == 0x005d) { 368 /* ck804, base address is magic */ 369 base = 0xe0000000UL; 370 } else if (device_id >= 0x0374 && device_id <= 0x378) { 371 /* mcp55, base address stored in chipset */ 372 mcp55 = pci_find_bsf(0, 0, 0); 373 if (mcp55 && 374 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 375 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 376 word = pci_read_config(mcp55, 0x90, 2); 377 base = ((unsigned long)word & 0x7ffeU) << 25; 378 } 379 } 380 if (!base) 381 return; 382 383 /* XXXX 384 Test below is commented because it is believed that doing 385 config read/write beyond 0xff will access the config space 386 for the next larger function. Uncomment this and remove 387 the hacky pmap_mapdev() way of accessing config space when 388 FreeBSD grows support for extended pcie config space access 389 */ 390 #if 0 391 /* See if we can, by some miracle, access the extended 392 config space */ 393 val = pci_read_config(pdev, 0x178, 4); 394 if (val != 0xffffffff) { 395 val |= 0x40; 396 pci_write_config(pdev, 0x178, val, 4); 397 return; 398 } 399 #endif 400 /* Rather than using normal pci config space writes, we must 401 * map the Nvidia config space ourselves. This is because on 402 * opteron/nvidia class machine the 0xe000000 mapping is 403 * handled by the nvidia chipset, that means the internal PCI 404 * device (the on-chip northbridge), or the amd-8131 bridge 405 * and things behind them are not visible by this method. 406 */ 407 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_BUS, &bus); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_SLOT, &slot); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_FUNCTION, &func); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_VENDOR, &ivend); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_DEVICE, &idev); 418 419 off = base 420 + 0x00100000UL * (unsigned long)bus 421 + 0x00001000UL * (unsigned long)(func 422 + 8 * slot); 423 424 /* map it into the kernel */ 425 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 426 427 if (va == NULL) { 428 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 429 return; 430 } 431 /* get a pointer to the config space mapped into the kernel */ 432 cfgptr = va + (off & PAGE_MASK); 433 434 /* make sure that we can really access it */ 435 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 436 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 437 if (! (vendor_id == ivend && device_id == idev)) { 438 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 439 vendor_id, device_id); 440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 441 return; 442 } 443 444 ptr32 = (uint32_t*)(cfgptr + 0x178); 445 val = *ptr32; 446 447 if (val == 0xffffffff) { 448 device_printf(sc->dev, "extended mapping failed\n"); 449 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 450 return; 451 } 452 *ptr32 = val | 0x40; 453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 454 if (mxge_verbose) 455 device_printf(sc->dev, 456 "Enabled ECRC on upstream Nvidia bridge " 457 "at %d:%d:%d\n", 458 (int)bus, (int)slot, (int)func); 459 return; 460 } 461 #else 462 static void 463 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 464 { 465 device_printf(sc->dev, 466 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 467 return; 468 } 469 #endif 470 471 static int 472 mxge_dma_test(mxge_softc_t *sc, int test_type) 473 { 474 mxge_cmd_t cmd; 475 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 476 int status; 477 uint32_t len; 478 char *test = " "; 479 480 /* Run a small DMA test. 481 * The magic multipliers to the length tell the firmware 482 * to do DMA read, write, or read+write tests. The 483 * results are returned in cmd.data0. The upper 16 484 * bits of the return is the number of transfers completed. 485 * The lower 16 bits is the time in 0.5us ticks that the 486 * transfers took to complete. 487 */ 488 489 len = sc->tx_boundary; 490 491 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 492 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 493 cmd.data2 = len * 0x10000; 494 status = mxge_send_cmd(sc, test_type, &cmd); 495 if (status != 0) { 496 test = "read"; 497 goto abort; 498 } 499 sc->read_dma = ((cmd.data0>>16) * len * 2) / 500 (cmd.data0 & 0xffff); 501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 503 cmd.data2 = len * 0x1; 504 status = mxge_send_cmd(sc, test_type, &cmd); 505 if (status != 0) { 506 test = "write"; 507 goto abort; 508 } 509 sc->write_dma = ((cmd.data0>>16) * len * 2) / 510 (cmd.data0 & 0xffff); 511 512 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 513 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 514 cmd.data2 = len * 0x10001; 515 status = mxge_send_cmd(sc, test_type, &cmd); 516 if (status != 0) { 517 test = "read/write"; 518 goto abort; 519 } 520 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 521 (cmd.data0 & 0xffff); 522 523 abort: 524 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 525 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 526 test, status); 527 528 return status; 529 } 530 531 /* 532 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 533 * when the PCI-E Completion packets are aligned on an 8-byte 534 * boundary. Some PCI-E chip sets always align Completion packets; on 535 * the ones that do not, the alignment can be enforced by enabling 536 * ECRC generation (if supported). 537 * 538 * When PCI-E Completion packets are not aligned, it is actually more 539 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 540 * 541 * If the driver can neither enable ECRC nor verify that it has 542 * already been enabled, then it must use a firmware image which works 543 * around unaligned completion packets (ethp_z8e.dat), and it should 544 * also ensure that it never gives the device a Read-DMA which is 545 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 546 * enabled, then the driver should use the aligned (eth_z8e.dat) 547 * firmware image, and set tx_boundary to 4KB. 548 */ 549 550 static int 551 mxge_firmware_probe(mxge_softc_t *sc) 552 { 553 device_t dev = sc->dev; 554 int reg, status; 555 uint16_t pectl; 556 557 sc->tx_boundary = 4096; 558 /* 559 * Verify the max read request size was set to 4KB 560 * before trying the test with 4KB. 561 */ 562 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 563 pectl = pci_read_config(dev, reg + 0x8, 2); 564 if ((pectl & (5 << 12)) != (5 << 12)) { 565 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 566 pectl); 567 sc->tx_boundary = 2048; 568 } 569 } 570 571 /* 572 * load the optimized firmware (which assumes aligned PCIe 573 * completions) in order to see if it works on this host. 574 */ 575 sc->fw_name = mxge_fw_aligned; 576 status = mxge_load_firmware(sc, 1); 577 if (status != 0) { 578 return status; 579 } 580 581 /* 582 * Enable ECRC if possible 583 */ 584 mxge_enable_nvidia_ecrc(sc); 585 586 /* 587 * Run a DMA test which watches for unaligned completions and 588 * aborts on the first one seen. Not required on Z8ES or newer. 589 */ 590 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 591 return 0; 592 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 593 if (status == 0) 594 return 0; /* keep the aligned firmware */ 595 596 if (status != E2BIG) 597 device_printf(dev, "DMA test failed: %d\n", status); 598 if (status == ENOSYS) 599 device_printf(dev, "Falling back to ethp! " 600 "Please install up to date fw\n"); 601 return status; 602 } 603 604 static int 605 mxge_select_firmware(mxge_softc_t *sc) 606 { 607 int aligned = 0; 608 int force_firmware = mxge_force_firmware; 609 610 if (sc->throttle) 611 force_firmware = sc->throttle; 612 613 if (force_firmware != 0) { 614 if (force_firmware == 1) 615 aligned = 1; 616 else 617 aligned = 0; 618 if (mxge_verbose) 619 device_printf(sc->dev, 620 "Assuming %s completions (forced)\n", 621 aligned ? "aligned" : "unaligned"); 622 goto abort; 623 } 624 625 /* if the PCIe link width is 4 or less, we can use the aligned 626 firmware and skip any checks */ 627 if (sc->link_width != 0 && sc->link_width <= 4) { 628 device_printf(sc->dev, 629 "PCIe x%d Link, expect reduced performance\n", 630 sc->link_width); 631 aligned = 1; 632 goto abort; 633 } 634 635 if (0 == mxge_firmware_probe(sc)) 636 return 0; 637 638 abort: 639 if (aligned) { 640 sc->fw_name = mxge_fw_aligned; 641 sc->tx_boundary = 4096; 642 } else { 643 sc->fw_name = mxge_fw_unaligned; 644 sc->tx_boundary = 2048; 645 } 646 return (mxge_load_firmware(sc, 0)); 647 } 648 649 static int 650 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 651 { 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static int 680 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 681 { 682 z_stream zs; 683 char *inflate_buffer; 684 const struct firmware *fw; 685 const mcp_gen_header_t *hdr; 686 unsigned hdr_offset; 687 int status; 688 unsigned int i; 689 char dummy; 690 size_t fw_len; 691 692 fw = firmware_get(sc->fw_name); 693 if (fw == NULL) { 694 device_printf(sc->dev, "Could not find firmware image %s\n", 695 sc->fw_name); 696 return ENOENT; 697 } 698 699 /* setup zlib and decompress f/w */ 700 bzero(&zs, sizeof (zs)); 701 zs.zalloc = zcalloc_nowait; 702 zs.zfree = zcfree; 703 status = inflateInit(&zs); 704 if (status != Z_OK) { 705 status = EIO; 706 goto abort_with_fw; 707 } 708 709 /* the uncompressed size is stored as the firmware version, 710 which would otherwise go unused */ 711 fw_len = (size_t) fw->version; 712 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 713 if (inflate_buffer == NULL) 714 goto abort_with_zs; 715 zs.avail_in = fw->datasize; 716 zs.next_in = __DECONST(char *, fw->data); 717 zs.avail_out = fw_len; 718 zs.next_out = inflate_buffer; 719 status = inflate(&zs, Z_FINISH); 720 if (status != Z_STREAM_END) { 721 device_printf(sc->dev, "zlib %d\n", status); 722 status = EIO; 723 goto abort_with_buffer; 724 } 725 726 /* check id */ 727 hdr_offset = htobe32(*(const uint32_t *) 728 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 729 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 730 device_printf(sc->dev, "Bad firmware file"); 731 status = EIO; 732 goto abort_with_buffer; 733 } 734 hdr = (const void*)(inflate_buffer + hdr_offset); 735 736 status = mxge_validate_firmware(sc, hdr); 737 if (status != 0) 738 goto abort_with_buffer; 739 740 /* Copy the inflated firmware to NIC SRAM. */ 741 for (i = 0; i < fw_len; i += 256) { 742 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 743 inflate_buffer + i, 744 min(256U, (unsigned)(fw_len - i))); 745 wmb(); 746 dummy = *sc->sram; 747 wmb(); 748 } 749 750 *limit = fw_len; 751 status = 0; 752 abort_with_buffer: 753 free(inflate_buffer, M_TEMP); 754 abort_with_zs: 755 inflateEnd(&zs); 756 abort_with_fw: 757 firmware_put(fw, FIRMWARE_UNLOAD); 758 return status; 759 } 760 761 /* 762 * Enable or disable periodic RDMAs from the host to make certain 763 * chipsets resend dropped PCIe messages 764 */ 765 766 static void 767 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 768 { 769 char buf_bytes[72]; 770 volatile uint32_t *confirm; 771 volatile char *submit; 772 uint32_t *buf, dma_low, dma_high; 773 int i; 774 775 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 776 777 /* clear confirmation addr */ 778 confirm = (volatile uint32_t *)sc->cmd; 779 *confirm = 0; 780 wmb(); 781 782 /* send an rdma command to the PCIe engine, and wait for the 783 response in the confirmation address. The firmware should 784 write a -1 there to indicate it is alive and well 785 */ 786 787 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 788 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 789 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 790 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 791 buf[2] = htobe32(0xffffffff); /* confirm data */ 792 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 793 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 794 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 795 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 796 buf[5] = htobe32(enable); /* enable? */ 797 798 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 799 800 mxge_pio_copy(submit, buf, 64); 801 wmb(); 802 DELAY(1000); 803 wmb(); 804 i = 0; 805 while (*confirm != 0xffffffff && i < 20) { 806 DELAY(1000); 807 i++; 808 } 809 if (*confirm != 0xffffffff) { 810 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 811 (enable ? "enable" : "disable"), confirm, 812 *confirm); 813 } 814 return; 815 } 816 817 static int 818 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 819 { 820 mcp_cmd_t *buf; 821 char buf_bytes[sizeof(*buf) + 8]; 822 volatile mcp_cmd_response_t *response = sc->cmd; 823 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 824 uint32_t dma_low, dma_high; 825 int err, sleep_total = 0; 826 827 /* ensure buf is aligned to 8 bytes */ 828 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 829 830 buf->data0 = htobe32(data->data0); 831 buf->data1 = htobe32(data->data1); 832 buf->data2 = htobe32(data->data2); 833 buf->cmd = htobe32(cmd); 834 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 835 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 836 837 buf->response_addr.low = htobe32(dma_low); 838 buf->response_addr.high = htobe32(dma_high); 839 mtx_lock(&sc->cmd_mtx); 840 response->result = 0xffffffff; 841 wmb(); 842 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 843 844 /* wait up to 20ms */ 845 err = EAGAIN; 846 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 847 bus_dmamap_sync(sc->cmd_dma.dmat, 848 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 849 wmb(); 850 switch (be32toh(response->result)) { 851 case 0: 852 data->data0 = be32toh(response->data); 853 err = 0; 854 break; 855 case 0xffffffff: 856 DELAY(1000); 857 break; 858 case MXGEFW_CMD_UNKNOWN: 859 err = ENOSYS; 860 break; 861 case MXGEFW_CMD_ERROR_UNALIGNED: 862 err = E2BIG; 863 break; 864 case MXGEFW_CMD_ERROR_BUSY: 865 err = EBUSY; 866 break; 867 case MXGEFW_CMD_ERROR_I2C_ABSENT: 868 err = ENXIO; 869 break; 870 default: 871 device_printf(sc->dev, 872 "mxge: command %d " 873 "failed, result = %d\n", 874 cmd, be32toh(response->result)); 875 err = ENXIO; 876 break; 877 } 878 if (err != EAGAIN) 879 break; 880 } 881 if (err == EAGAIN) 882 device_printf(sc->dev, "mxge: command %d timed out" 883 "result = %d\n", 884 cmd, be32toh(response->result)); 885 mtx_unlock(&sc->cmd_mtx); 886 return err; 887 } 888 889 static int 890 mxge_adopt_running_firmware(mxge_softc_t *sc) 891 { 892 struct mcp_gen_header *hdr; 893 const size_t bytes = sizeof (struct mcp_gen_header); 894 size_t hdr_offset; 895 int status; 896 897 /* find running firmware header */ 898 hdr_offset = htobe32(*(volatile uint32_t *) 899 (sc->sram + MCP_HEADER_PTR_OFFSET)); 900 901 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 902 device_printf(sc->dev, 903 "Running firmware has bad header offset (%d)\n", 904 (int)hdr_offset); 905 return EIO; 906 } 907 908 /* copy header of running firmware from SRAM to host memory to 909 * validate firmware */ 910 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 911 if (hdr == NULL) { 912 device_printf(sc->dev, "could not malloc firmware hdr\n"); 913 return ENOMEM; 914 } 915 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 916 rman_get_bushandle(sc->mem_res), 917 hdr_offset, (char *)hdr, bytes); 918 status = mxge_validate_firmware(sc, hdr); 919 free(hdr, M_DEVBUF); 920 921 /* 922 * check to see if adopted firmware has bug where adopting 923 * it will cause broadcasts to be filtered unless the NIC 924 * is kept in ALLMULTI mode 925 */ 926 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 927 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 928 sc->adopted_rx_filter_bug = 1; 929 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 930 "working around rx filter bug\n", 931 sc->fw_ver_major, sc->fw_ver_minor, 932 sc->fw_ver_tiny); 933 } 934 935 return status; 936 } 937 938 static int 939 mxge_load_firmware(mxge_softc_t *sc, int adopt) 940 { 941 volatile uint32_t *confirm; 942 volatile char *submit; 943 char buf_bytes[72]; 944 uint32_t *buf, size, dma_low, dma_high; 945 int status, i; 946 947 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL); 948 949 size = sc->sram_size; 950 status = mxge_load_firmware_helper(sc, &size); 951 if (status) { 952 if (!adopt) 953 return status; 954 /* Try to use the currently running firmware, if 955 it is new enough */ 956 status = mxge_adopt_running_firmware(sc); 957 if (status) { 958 device_printf(sc->dev, 959 "failed to adopt running firmware\n"); 960 return status; 961 } 962 device_printf(sc->dev, 963 "Successfully adopted running firmware\n"); 964 if (sc->tx_boundary == 4096) { 965 device_printf(sc->dev, 966 "Using firmware currently running on NIC" 967 ". For optimal\n"); 968 device_printf(sc->dev, 969 "performance consider loading optimized " 970 "firmware\n"); 971 } 972 sc->fw_name = mxge_fw_unaligned; 973 sc->tx_boundary = 2048; 974 return 0; 975 } 976 /* clear confirmation addr */ 977 confirm = (volatile uint32_t *)sc->cmd; 978 *confirm = 0; 979 wmb(); 980 /* send a reload command to the bootstrap MCP, and wait for the 981 response in the confirmation address. The firmware should 982 write a -1 there to indicate it is alive and well 983 */ 984 985 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 986 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 987 988 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 989 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 990 buf[2] = htobe32(0xffffffff); /* confirm data */ 991 992 /* FIX: All newest firmware should un-protect the bottom of 993 the sram before handoff. However, the very first interfaces 994 do not. Therefore the handoff copy must skip the first 8 bytes 995 */ 996 /* where the code starts*/ 997 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 998 buf[4] = htobe32(size - 8); /* length of code */ 999 buf[5] = htobe32(8); /* where to copy to */ 1000 buf[6] = htobe32(0); /* where to jump to */ 1001 1002 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1003 mxge_pio_copy(submit, buf, 64); 1004 wmb(); 1005 DELAY(1000); 1006 wmb(); 1007 i = 0; 1008 while (*confirm != 0xffffffff && i < 20) { 1009 DELAY(1000*10); 1010 i++; 1011 bus_dmamap_sync(sc->cmd_dma.dmat, 1012 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1013 } 1014 if (*confirm != 0xffffffff) { 1015 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1016 confirm, *confirm); 1017 1018 return ENXIO; 1019 } 1020 return 0; 1021 } 1022 1023 static int 1024 mxge_update_mac_address(mxge_softc_t *sc) 1025 { 1026 mxge_cmd_t cmd; 1027 uint8_t *addr = sc->mac_addr; 1028 int status; 1029 1030 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1031 | (addr[2] << 8) | addr[3]); 1032 1033 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1034 1035 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1036 return status; 1037 } 1038 1039 static int 1040 mxge_change_pause(mxge_softc_t *sc, int pause) 1041 { 1042 mxge_cmd_t cmd; 1043 int status; 1044 1045 if (pause) 1046 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1047 &cmd); 1048 else 1049 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1050 &cmd); 1051 1052 if (status) { 1053 device_printf(sc->dev, "Failed to set flow control mode\n"); 1054 return ENXIO; 1055 } 1056 sc->pause = pause; 1057 return 0; 1058 } 1059 1060 static void 1061 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1062 { 1063 mxge_cmd_t cmd; 1064 int status; 1065 1066 if (mxge_always_promisc) 1067 promisc = 1; 1068 1069 if (promisc) 1070 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1071 &cmd); 1072 else 1073 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1074 &cmd); 1075 1076 if (status) { 1077 device_printf(sc->dev, "Failed to set promisc mode\n"); 1078 } 1079 } 1080 1081 struct mxge_add_maddr_ctx { 1082 mxge_softc_t *sc; 1083 int error; 1084 }; 1085 1086 static u_int 1087 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1088 { 1089 struct mxge_add_maddr_ctx *ctx = arg; 1090 mxge_cmd_t cmd; 1091 1092 if (ctx->error != 0) 1093 return (0); 1094 bcopy(LLADDR(sdl), &cmd.data0, 4); 1095 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1096 cmd.data0 = htonl(cmd.data0); 1097 cmd.data1 = htonl(cmd.data1); 1098 1099 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1100 1101 return (1); 1102 } 1103 1104 static void 1105 mxge_set_multicast_list(mxge_softc_t *sc) 1106 { 1107 struct mxge_add_maddr_ctx ctx; 1108 struct ifnet *ifp = sc->ifp; 1109 mxge_cmd_t cmd; 1110 int err; 1111 1112 /* This firmware is known to not support multicast */ 1113 if (!sc->fw_multicast_support) 1114 return; 1115 1116 /* Disable multicast filtering while we play with the lists*/ 1117 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1118 if (err != 0) { 1119 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1120 " error status: %d\n", err); 1121 return; 1122 } 1123 1124 if (sc->adopted_rx_filter_bug) 1125 return; 1126 1127 if (ifp->if_flags & IFF_ALLMULTI) 1128 /* request to disable multicast filtering, so quit here */ 1129 return; 1130 1131 /* Flush all the filters */ 1132 1133 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1134 if (err != 0) { 1135 device_printf(sc->dev, 1136 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1137 ", error status: %d\n", err); 1138 return; 1139 } 1140 1141 /* Walk the multicast list, and add each address */ 1142 ctx.sc = sc; 1143 ctx.error = 0; 1144 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1145 if (ctx.error != 0) { 1146 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1147 "error status:" "%d\t", ctx.error); 1148 /* abort, leaving multicast filtering off */ 1149 return; 1150 } 1151 1152 /* Enable multicast filtering */ 1153 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1154 if (err != 0) { 1155 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1156 ", error status: %d\n", err); 1157 } 1158 } 1159 1160 static int 1161 mxge_max_mtu(mxge_softc_t *sc) 1162 { 1163 mxge_cmd_t cmd; 1164 int status; 1165 1166 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1167 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1168 1169 /* try to set nbufs to see if it we can 1170 use virtually contiguous jumbos */ 1171 cmd.data0 = 0; 1172 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1173 &cmd); 1174 if (status == 0) 1175 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1176 1177 /* otherwise, we're limited to MJUMPAGESIZE */ 1178 return MJUMPAGESIZE - MXGEFW_PAD; 1179 } 1180 1181 static int 1182 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1183 { 1184 struct mxge_slice_state *ss; 1185 mxge_rx_done_t *rx_done; 1186 volatile uint32_t *irq_claim; 1187 mxge_cmd_t cmd; 1188 int slice, status; 1189 1190 /* try to send a reset command to the card to see if it 1191 is alive */ 1192 memset(&cmd, 0, sizeof (cmd)); 1193 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1194 if (status != 0) { 1195 device_printf(sc->dev, "failed reset\n"); 1196 return ENXIO; 1197 } 1198 1199 mxge_dummy_rdma(sc, 1); 1200 1201 /* set the intrq size */ 1202 cmd.data0 = sc->rx_ring_size; 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1204 1205 /* 1206 * Even though we already know how many slices are supported 1207 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1208 * has magic side effects, and must be called after a reset. 1209 * It must be called prior to calling any RSS related cmds, 1210 * including assigning an interrupt queue for anything but 1211 * slice 0. It must also be called *after* 1212 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1213 * the firmware to compute offsets. 1214 */ 1215 1216 if (sc->num_slices > 1) { 1217 /* ask the maximum number of slices it supports */ 1218 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1219 &cmd); 1220 if (status != 0) { 1221 device_printf(sc->dev, 1222 "failed to get number of slices\n"); 1223 return status; 1224 } 1225 /* 1226 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1227 * to setting up the interrupt queue DMA 1228 */ 1229 cmd.data0 = sc->num_slices; 1230 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1231 #ifdef IFNET_BUF_RING 1232 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1233 #endif 1234 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1235 &cmd); 1236 if (status != 0) { 1237 device_printf(sc->dev, 1238 "failed to set number of slices\n"); 1239 return status; 1240 } 1241 } 1242 1243 if (interrupts_setup) { 1244 /* Now exchange information about interrupts */ 1245 for (slice = 0; slice < sc->num_slices; slice++) { 1246 rx_done = &sc->ss[slice].rx_done; 1247 memset(rx_done->entry, 0, sc->rx_ring_size); 1248 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1249 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1250 cmd.data2 = slice; 1251 status |= mxge_send_cmd(sc, 1252 MXGEFW_CMD_SET_INTRQ_DMA, 1253 &cmd); 1254 } 1255 } 1256 1257 status |= mxge_send_cmd(sc, 1258 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1259 1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1261 1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1264 1265 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1266 &cmd); 1267 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1268 if (status != 0) { 1269 device_printf(sc->dev, "failed set interrupt parameters\n"); 1270 return status; 1271 } 1272 1273 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1274 1275 /* run a DMA benchmark */ 1276 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1277 1278 for (slice = 0; slice < sc->num_slices; slice++) { 1279 ss = &sc->ss[slice]; 1280 1281 ss->irq_claim = irq_claim + (2 * slice); 1282 /* reset mcp/driver shared state back to 0 */ 1283 ss->rx_done.idx = 0; 1284 ss->rx_done.cnt = 0; 1285 ss->tx.req = 0; 1286 ss->tx.done = 0; 1287 ss->tx.pkt_done = 0; 1288 ss->tx.queue_active = 0; 1289 ss->tx.activate = 0; 1290 ss->tx.deactivate = 0; 1291 ss->tx.wake = 0; 1292 ss->tx.defrag = 0; 1293 ss->tx.stall = 0; 1294 ss->rx_big.cnt = 0; 1295 ss->rx_small.cnt = 0; 1296 ss->lc.lro_bad_csum = 0; 1297 ss->lc.lro_queued = 0; 1298 ss->lc.lro_flushed = 0; 1299 if (ss->fw_stats != NULL) { 1300 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1301 } 1302 } 1303 sc->rdma_tags_available = 15; 1304 status = mxge_update_mac_address(sc); 1305 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1306 mxge_change_pause(sc, sc->pause); 1307 mxge_set_multicast_list(sc); 1308 if (sc->throttle) { 1309 cmd.data0 = sc->throttle; 1310 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1311 &cmd)) { 1312 device_printf(sc->dev, 1313 "can't enable throttle\n"); 1314 } 1315 } 1316 return status; 1317 } 1318 1319 static int 1320 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1321 { 1322 mxge_cmd_t cmd; 1323 mxge_softc_t *sc; 1324 int err; 1325 unsigned int throttle; 1326 1327 sc = arg1; 1328 throttle = sc->throttle; 1329 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1330 if (err != 0) { 1331 return err; 1332 } 1333 1334 if (throttle == sc->throttle) 1335 return 0; 1336 1337 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1338 return EINVAL; 1339 1340 mtx_lock(&sc->driver_mtx); 1341 cmd.data0 = throttle; 1342 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1343 if (err == 0) 1344 sc->throttle = throttle; 1345 mtx_unlock(&sc->driver_mtx); 1346 return err; 1347 } 1348 1349 static int 1350 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1351 { 1352 mxge_softc_t *sc; 1353 unsigned int intr_coal_delay; 1354 int err; 1355 1356 sc = arg1; 1357 intr_coal_delay = sc->intr_coal_delay; 1358 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1359 if (err != 0) { 1360 return err; 1361 } 1362 if (intr_coal_delay == sc->intr_coal_delay) 1363 return 0; 1364 1365 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1366 return EINVAL; 1367 1368 mtx_lock(&sc->driver_mtx); 1369 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1370 sc->intr_coal_delay = intr_coal_delay; 1371 1372 mtx_unlock(&sc->driver_mtx); 1373 return err; 1374 } 1375 1376 static int 1377 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1378 { 1379 mxge_softc_t *sc; 1380 unsigned int enabled; 1381 int err; 1382 1383 sc = arg1; 1384 enabled = sc->pause; 1385 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1386 if (err != 0) { 1387 return err; 1388 } 1389 if (enabled == sc->pause) 1390 return 0; 1391 1392 mtx_lock(&sc->driver_mtx); 1393 err = mxge_change_pause(sc, enabled); 1394 mtx_unlock(&sc->driver_mtx); 1395 return err; 1396 } 1397 1398 static int 1399 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1400 { 1401 int err; 1402 1403 if (arg1 == NULL) 1404 return EFAULT; 1405 arg2 = be32toh(*(int *)arg1); 1406 arg1 = NULL; 1407 err = sysctl_handle_int(oidp, arg1, arg2, req); 1408 1409 return err; 1410 } 1411 1412 static void 1413 mxge_rem_sysctls(mxge_softc_t *sc) 1414 { 1415 struct mxge_slice_state *ss; 1416 int slice; 1417 1418 if (sc->slice_sysctl_tree == NULL) 1419 return; 1420 1421 for (slice = 0; slice < sc->num_slices; slice++) { 1422 ss = &sc->ss[slice]; 1423 if (ss == NULL || ss->sysctl_tree == NULL) 1424 continue; 1425 sysctl_ctx_free(&ss->sysctl_ctx); 1426 ss->sysctl_tree = NULL; 1427 } 1428 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1429 sc->slice_sysctl_tree = NULL; 1430 } 1431 1432 static void 1433 mxge_add_sysctls(mxge_softc_t *sc) 1434 { 1435 struct sysctl_ctx_list *ctx; 1436 struct sysctl_oid_list *children; 1437 mcp_irq_data_t *fw; 1438 struct mxge_slice_state *ss; 1439 int slice; 1440 char slice_num[8]; 1441 1442 ctx = device_get_sysctl_ctx(sc->dev); 1443 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1444 fw = sc->ss[0].fw_stats; 1445 1446 /* random information */ 1447 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1448 "firmware_version", 1449 CTLFLAG_RD, sc->fw_version, 1450 0, "firmware version"); 1451 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1452 "serial_number", 1453 CTLFLAG_RD, sc->serial_number_string, 1454 0, "serial number"); 1455 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1456 "product_code", 1457 CTLFLAG_RD, sc->product_code_string, 1458 0, "product_code"); 1459 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1460 "pcie_link_width", 1461 CTLFLAG_RD, &sc->link_width, 1462 0, "tx_boundary"); 1463 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1464 "tx_boundary", 1465 CTLFLAG_RD, &sc->tx_boundary, 1466 0, "tx_boundary"); 1467 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1468 "write_combine", 1469 CTLFLAG_RD, &sc->wc, 1470 0, "write combining PIO?"); 1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1472 "read_dma_MBs", 1473 CTLFLAG_RD, &sc->read_dma, 1474 0, "DMA Read speed in MB/s"); 1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1476 "write_dma_MBs", 1477 CTLFLAG_RD, &sc->write_dma, 1478 0, "DMA Write speed in MB/s"); 1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1480 "read_write_dma_MBs", 1481 CTLFLAG_RD, &sc->read_write_dma, 1482 0, "DMA concurrent Read/Write speed in MB/s"); 1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1484 "watchdog_resets", 1485 CTLFLAG_RD, &sc->watchdog_resets, 1486 0, "Number of times NIC was reset"); 1487 1488 /* performance related tunables */ 1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1490 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1491 sc, 0, mxge_change_intr_coal, "I", 1492 "interrupt coalescing delay in usecs"); 1493 1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1495 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1496 mxge_change_throttle, "I", "transmit throttling"); 1497 1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1499 "flow_control_enabled", 1500 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1501 mxge_change_flow_control, "I", 1502 "interrupt coalescing delay in usecs"); 1503 1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1505 "deassert_wait", 1506 CTLFLAG_RW, &mxge_deassert_wait, 1507 0, "Wait for IRQ line to go low in ihandler"); 1508 1509 /* stats block from firmware is in network byte order. 1510 Need to swap it */ 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1513 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1515 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1516 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1517 "rdma_tags_available"); 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1520 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1521 "dropped_bad_crc32"); 1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1523 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1524 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1526 "dropped_link_error_or_filtered", 1527 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1528 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1529 "dropped_link_error_or_filtered"); 1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1531 "dropped_link_overflow", 1532 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1533 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1534 "dropped_link_overflow"); 1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1536 "dropped_multicast_filtered", 1537 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1538 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1539 "dropped_multicast_filtered"); 1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1541 "dropped_no_big_buffer", 1542 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1543 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1544 "dropped_no_big_buffer"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_no_small_buffer", 1547 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1548 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1549 "dropped_no_small_buffer"); 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "dropped_overrun", 1552 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1553 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1554 "dropped_overrun"); 1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1556 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1557 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1559 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1560 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1561 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "dropped_unicast_filtered", 1564 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1565 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1566 "dropped_unicast_filtered"); 1567 1568 /* verbose printing? */ 1569 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1570 "verbose", 1571 CTLFLAG_RW, &mxge_verbose, 1572 0, "verbose printing"); 1573 1574 /* add counters exported for debugging from all slices */ 1575 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1576 sc->slice_sysctl_tree = 1577 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1578 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1579 1580 for (slice = 0; slice < sc->num_slices; slice++) { 1581 ss = &sc->ss[slice]; 1582 sysctl_ctx_init(&ss->sysctl_ctx); 1583 ctx = &ss->sysctl_ctx; 1584 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1585 sprintf(slice_num, "%d", slice); 1586 ss->sysctl_tree = 1587 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1588 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1589 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1590 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1591 "rx_small_cnt", 1592 CTLFLAG_RD, &ss->rx_small.cnt, 1593 0, "rx_small_cnt"); 1594 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1595 "rx_big_cnt", 1596 CTLFLAG_RD, &ss->rx_big.cnt, 1597 0, "rx_small_cnt"); 1598 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1599 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1600 0, "number of lro merge queues flushed"); 1601 1602 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1603 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1604 0, "number of bad csums preventing LRO"); 1605 1606 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1607 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1608 0, "number of frames appended to lro merge" 1609 "queues"); 1610 1611 #ifndef IFNET_BUF_RING 1612 /* only transmit from slice 0 for now */ 1613 if (slice > 0) 1614 continue; 1615 #endif 1616 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1617 "tx_req", 1618 CTLFLAG_RD, &ss->tx.req, 1619 0, "tx_req"); 1620 1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1622 "tx_done", 1623 CTLFLAG_RD, &ss->tx.done, 1624 0, "tx_done"); 1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1626 "tx_pkt_done", 1627 CTLFLAG_RD, &ss->tx.pkt_done, 1628 0, "tx_done"); 1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1630 "tx_stall", 1631 CTLFLAG_RD, &ss->tx.stall, 1632 0, "tx_stall"); 1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1634 "tx_wake", 1635 CTLFLAG_RD, &ss->tx.wake, 1636 0, "tx_wake"); 1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1638 "tx_defrag", 1639 CTLFLAG_RD, &ss->tx.defrag, 1640 0, "tx_defrag"); 1641 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1642 "tx_queue_active", 1643 CTLFLAG_RD, &ss->tx.queue_active, 1644 0, "tx_queue_active"); 1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1646 "tx_activate", 1647 CTLFLAG_RD, &ss->tx.activate, 1648 0, "tx_activate"); 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "tx_deactivate", 1651 CTLFLAG_RD, &ss->tx.deactivate, 1652 0, "tx_deactivate"); 1653 } 1654 } 1655 1656 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1657 backwards one at a time and handle ring wraps */ 1658 1659 static inline void 1660 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1661 mcp_kreq_ether_send_t *src, int cnt) 1662 { 1663 int idx, starting_slot; 1664 starting_slot = tx->req; 1665 while (cnt > 1) { 1666 cnt--; 1667 idx = (starting_slot + cnt) & tx->mask; 1668 mxge_pio_copy(&tx->lanai[idx], 1669 &src[cnt], sizeof(*src)); 1670 wmb(); 1671 } 1672 } 1673 1674 /* 1675 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1676 * at most 32 bytes at a time, so as to avoid involving the software 1677 * pio handler in the nic. We re-write the first segment's flags 1678 * to mark them valid only after writing the entire chain 1679 */ 1680 1681 static inline void 1682 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1683 int cnt) 1684 { 1685 int idx, i; 1686 uint32_t *src_ints; 1687 volatile uint32_t *dst_ints; 1688 mcp_kreq_ether_send_t *srcp; 1689 volatile mcp_kreq_ether_send_t *dstp, *dst; 1690 uint8_t last_flags; 1691 1692 idx = tx->req & tx->mask; 1693 1694 last_flags = src->flags; 1695 src->flags = 0; 1696 wmb(); 1697 dst = dstp = &tx->lanai[idx]; 1698 srcp = src; 1699 1700 if ((idx + cnt) < tx->mask) { 1701 for (i = 0; i < (cnt - 1); i += 2) { 1702 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1703 wmb(); /* force write every 32 bytes */ 1704 srcp += 2; 1705 dstp += 2; 1706 } 1707 } else { 1708 /* submit all but the first request, and ensure 1709 that it is submitted below */ 1710 mxge_submit_req_backwards(tx, src, cnt); 1711 i = 0; 1712 } 1713 if (i < cnt) { 1714 /* submit the first request */ 1715 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1716 wmb(); /* barrier before setting valid flag */ 1717 } 1718 1719 /* re-write the last 32-bits with the valid flags */ 1720 src->flags = last_flags; 1721 src_ints = (uint32_t *)src; 1722 src_ints+=3; 1723 dst_ints = (volatile uint32_t *)dst; 1724 dst_ints+=3; 1725 *dst_ints = *src_ints; 1726 tx->req += cnt; 1727 wmb(); 1728 } 1729 1730 static int 1731 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1732 struct mxge_pkt_info *pi) 1733 { 1734 struct ether_vlan_header *eh; 1735 uint16_t etype; 1736 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1737 #if IFCAP_TSO6 && defined(INET6) 1738 int nxt; 1739 #endif 1740 1741 eh = mtod(m, struct ether_vlan_header *); 1742 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1743 etype = ntohs(eh->evl_proto); 1744 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1745 } else { 1746 etype = ntohs(eh->evl_encap_proto); 1747 pi->ip_off = ETHER_HDR_LEN; 1748 } 1749 1750 switch (etype) { 1751 case ETHERTYPE_IP: 1752 /* 1753 * ensure ip header is in first mbuf, copy it to a 1754 * scratch buffer if not 1755 */ 1756 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1757 pi->ip6 = NULL; 1758 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1759 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1760 ss->scratch); 1761 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1762 } 1763 pi->ip_hlen = pi->ip->ip_hl << 2; 1764 if (!tso) 1765 return 0; 1766 1767 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1768 sizeof(struct tcphdr))) { 1769 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1770 sizeof(struct tcphdr), ss->scratch); 1771 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1772 } 1773 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1774 break; 1775 #if IFCAP_TSO6 && defined(INET6) 1776 case ETHERTYPE_IPV6: 1777 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1778 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1779 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1780 ss->scratch); 1781 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1782 } 1783 nxt = 0; 1784 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1785 pi->ip_hlen -= pi->ip_off; 1786 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1787 return EINVAL; 1788 1789 if (!tso) 1790 return 0; 1791 1792 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1793 return EINVAL; 1794 1795 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1796 sizeof(struct tcphdr))) { 1797 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1798 sizeof(struct tcphdr), ss->scratch); 1799 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1800 } 1801 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1802 break; 1803 #endif 1804 default: 1805 return EINVAL; 1806 } 1807 return 0; 1808 } 1809 1810 #if IFCAP_TSO4 1811 1812 static void 1813 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1814 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1815 { 1816 mxge_tx_ring_t *tx; 1817 mcp_kreq_ether_send_t *req; 1818 bus_dma_segment_t *seg; 1819 uint32_t low, high_swapped; 1820 int len, seglen, cum_len, cum_len_next; 1821 int next_is_first, chop, cnt, rdma_count, small; 1822 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1823 uint8_t flags, flags_next; 1824 static int once; 1825 1826 mss = m->m_pkthdr.tso_segsz; 1827 1828 /* negative cum_len signifies to the 1829 * send loop that we are still in the 1830 * header portion of the TSO packet. 1831 */ 1832 1833 cksum_offset = pi->ip_off + pi->ip_hlen; 1834 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1835 1836 /* TSO implies checksum offload on this hardware */ 1837 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1838 /* 1839 * If packet has full TCP csum, replace it with pseudo hdr 1840 * sum that the NIC expects, otherwise the NIC will emit 1841 * packets with bad TCP checksums. 1842 */ 1843 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1844 if (pi->ip6) { 1845 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1846 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1847 sum = in6_cksum_pseudo(pi->ip6, 1848 m->m_pkthdr.len - cksum_offset, 1849 IPPROTO_TCP, 0); 1850 #endif 1851 } else { 1852 #ifdef INET 1853 m->m_pkthdr.csum_flags |= CSUM_TCP; 1854 sum = in_pseudo(pi->ip->ip_src.s_addr, 1855 pi->ip->ip_dst.s_addr, 1856 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1857 cksum_offset))); 1858 #endif 1859 } 1860 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1861 cksum_offset, sizeof(sum), (caddr_t)&sum); 1862 } 1863 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1864 1865 /* for TSO, pseudo_hdr_offset holds mss. 1866 * The firmware figures out where to put 1867 * the checksum by parsing the header. */ 1868 pseudo_hdr_offset = htobe16(mss); 1869 1870 if (pi->ip6) { 1871 /* 1872 * for IPv6 TSO, the "checksum offset" is re-purposed 1873 * to store the TCP header len 1874 */ 1875 cksum_offset = (pi->tcp->th_off << 2); 1876 } 1877 1878 tx = &ss->tx; 1879 req = tx->req_list; 1880 seg = tx->seg_list; 1881 cnt = 0; 1882 rdma_count = 0; 1883 /* "rdma_count" is the number of RDMAs belonging to the 1884 * current packet BEFORE the current send request. For 1885 * non-TSO packets, this is equal to "count". 1886 * For TSO packets, rdma_count needs to be reset 1887 * to 0 after a segment cut. 1888 * 1889 * The rdma_count field of the send request is 1890 * the number of RDMAs of the packet starting at 1891 * that request. For TSO send requests with one ore more cuts 1892 * in the middle, this is the number of RDMAs starting 1893 * after the last cut in the request. All previous 1894 * segments before the last cut implicitly have 1 RDMA. 1895 * 1896 * Since the number of RDMAs is not known beforehand, 1897 * it must be filled-in retroactively - after each 1898 * segmentation cut or at the end of the entire packet. 1899 */ 1900 1901 while (busdma_seg_cnt) { 1902 /* Break the busdma segment up into pieces*/ 1903 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1904 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1905 len = seg->ds_len; 1906 1907 while (len) { 1908 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1909 seglen = len; 1910 cum_len_next = cum_len + seglen; 1911 (req-rdma_count)->rdma_count = rdma_count + 1; 1912 if (__predict_true(cum_len >= 0)) { 1913 /* payload */ 1914 chop = (cum_len_next > mss); 1915 cum_len_next = cum_len_next % mss; 1916 next_is_first = (cum_len_next == 0); 1917 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1918 flags_next |= next_is_first * 1919 MXGEFW_FLAGS_FIRST; 1920 rdma_count |= -(chop | next_is_first); 1921 rdma_count += chop & !next_is_first; 1922 } else if (cum_len_next >= 0) { 1923 /* header ends */ 1924 rdma_count = -1; 1925 cum_len_next = 0; 1926 seglen = -cum_len; 1927 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1928 flags_next = MXGEFW_FLAGS_TSO_PLD | 1929 MXGEFW_FLAGS_FIRST | 1930 (small * MXGEFW_FLAGS_SMALL); 1931 } 1932 1933 req->addr_high = high_swapped; 1934 req->addr_low = htobe32(low); 1935 req->pseudo_hdr_offset = pseudo_hdr_offset; 1936 req->pad = 0; 1937 req->rdma_count = 1; 1938 req->length = htobe16(seglen); 1939 req->cksum_offset = cksum_offset; 1940 req->flags = flags | ((cum_len & 1) * 1941 MXGEFW_FLAGS_ALIGN_ODD); 1942 low += seglen; 1943 len -= seglen; 1944 cum_len = cum_len_next; 1945 flags = flags_next; 1946 req++; 1947 cnt++; 1948 rdma_count++; 1949 if (cksum_offset != 0 && !pi->ip6) { 1950 if (__predict_false(cksum_offset > seglen)) 1951 cksum_offset -= seglen; 1952 else 1953 cksum_offset = 0; 1954 } 1955 if (__predict_false(cnt > tx->max_desc)) 1956 goto drop; 1957 } 1958 busdma_seg_cnt--; 1959 seg++; 1960 } 1961 (req-rdma_count)->rdma_count = rdma_count; 1962 1963 do { 1964 req--; 1965 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1966 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1967 1968 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1969 mxge_submit_req(tx, tx->req_list, cnt); 1970 #ifdef IFNET_BUF_RING 1971 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1972 /* tell the NIC to start polling this slice */ 1973 *tx->send_go = 1; 1974 tx->queue_active = 1; 1975 tx->activate++; 1976 wmb(); 1977 } 1978 #endif 1979 return; 1980 1981 drop: 1982 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1983 m_freem(m); 1984 ss->oerrors++; 1985 if (!once) { 1986 printf("tx->max_desc exceeded via TSO!\n"); 1987 printf("mss = %d, %ld, %d!\n", mss, 1988 (long)seg - (long)tx->seg_list, tx->max_desc); 1989 once = 1; 1990 } 1991 return; 1992 1993 } 1994 1995 #endif /* IFCAP_TSO4 */ 1996 1997 #ifdef MXGE_NEW_VLAN_API 1998 /* 1999 * We reproduce the software vlan tag insertion from 2000 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2001 * vlan tag insertion. We need to advertise this in order to have the 2002 * vlan interface respect our csum offload flags. 2003 */ 2004 static struct mbuf * 2005 mxge_vlan_tag_insert(struct mbuf *m) 2006 { 2007 struct ether_vlan_header *evl; 2008 2009 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2010 if (__predict_false(m == NULL)) 2011 return NULL; 2012 if (m->m_len < sizeof(*evl)) { 2013 m = m_pullup(m, sizeof(*evl)); 2014 if (__predict_false(m == NULL)) 2015 return NULL; 2016 } 2017 /* 2018 * Transform the Ethernet header into an Ethernet header 2019 * with 802.1Q encapsulation. 2020 */ 2021 evl = mtod(m, struct ether_vlan_header *); 2022 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2023 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2024 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2025 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2026 m->m_flags &= ~M_VLANTAG; 2027 return m; 2028 } 2029 #endif /* MXGE_NEW_VLAN_API */ 2030 2031 static void 2032 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2033 { 2034 struct mxge_pkt_info pi = {0,0,0,0}; 2035 mxge_softc_t *sc; 2036 mcp_kreq_ether_send_t *req; 2037 bus_dma_segment_t *seg; 2038 struct mbuf *m_tmp; 2039 mxge_tx_ring_t *tx; 2040 int cnt, cum_len, err, i, idx, odd_flag; 2041 uint16_t pseudo_hdr_offset; 2042 uint8_t flags, cksum_offset; 2043 2044 sc = ss->sc; 2045 tx = &ss->tx; 2046 2047 #ifdef MXGE_NEW_VLAN_API 2048 if (m->m_flags & M_VLANTAG) { 2049 m = mxge_vlan_tag_insert(m); 2050 if (__predict_false(m == NULL)) 2051 goto drop_without_m; 2052 } 2053 #endif 2054 if (m->m_pkthdr.csum_flags & 2055 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2056 if (mxge_parse_tx(ss, m, &pi)) 2057 goto drop; 2058 } 2059 2060 /* (try to) map the frame for DMA */ 2061 idx = tx->req & tx->mask; 2062 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2063 m, tx->seg_list, &cnt, 2064 BUS_DMA_NOWAIT); 2065 if (__predict_false(err == EFBIG)) { 2066 /* Too many segments in the chain. Try 2067 to defrag */ 2068 m_tmp = m_defrag(m, M_NOWAIT); 2069 if (m_tmp == NULL) { 2070 goto drop; 2071 } 2072 ss->tx.defrag++; 2073 m = m_tmp; 2074 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2075 tx->info[idx].map, 2076 m, tx->seg_list, &cnt, 2077 BUS_DMA_NOWAIT); 2078 } 2079 if (__predict_false(err != 0)) { 2080 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2081 " packet len = %d\n", err, m->m_pkthdr.len); 2082 goto drop; 2083 } 2084 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2085 BUS_DMASYNC_PREWRITE); 2086 tx->info[idx].m = m; 2087 2088 #if IFCAP_TSO4 2089 /* TSO is different enough, we handle it in another routine */ 2090 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2091 mxge_encap_tso(ss, m, cnt, &pi); 2092 return; 2093 } 2094 #endif 2095 2096 req = tx->req_list; 2097 cksum_offset = 0; 2098 pseudo_hdr_offset = 0; 2099 flags = MXGEFW_FLAGS_NO_TSO; 2100 2101 /* checksum offloading? */ 2102 if (m->m_pkthdr.csum_flags & 2103 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2104 /* ensure ip header is in first mbuf, copy 2105 it to a scratch buffer if not */ 2106 cksum_offset = pi.ip_off + pi.ip_hlen; 2107 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2108 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2109 req->cksum_offset = cksum_offset; 2110 flags |= MXGEFW_FLAGS_CKSUM; 2111 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2112 } else { 2113 odd_flag = 0; 2114 } 2115 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2116 flags |= MXGEFW_FLAGS_SMALL; 2117 2118 /* convert segments into a request list */ 2119 cum_len = 0; 2120 seg = tx->seg_list; 2121 req->flags = MXGEFW_FLAGS_FIRST; 2122 for (i = 0; i < cnt; i++) { 2123 req->addr_low = 2124 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2125 req->addr_high = 2126 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2127 req->length = htobe16(seg->ds_len); 2128 req->cksum_offset = cksum_offset; 2129 if (cksum_offset > seg->ds_len) 2130 cksum_offset -= seg->ds_len; 2131 else 2132 cksum_offset = 0; 2133 req->pseudo_hdr_offset = pseudo_hdr_offset; 2134 req->pad = 0; /* complete solid 16-byte block */ 2135 req->rdma_count = 1; 2136 req->flags |= flags | ((cum_len & 1) * odd_flag); 2137 cum_len += seg->ds_len; 2138 seg++; 2139 req++; 2140 req->flags = 0; 2141 } 2142 req--; 2143 /* pad runts to 60 bytes */ 2144 if (cum_len < 60) { 2145 req++; 2146 req->addr_low = 2147 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2148 req->addr_high = 2149 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2150 req->length = htobe16(60 - cum_len); 2151 req->cksum_offset = 0; 2152 req->pseudo_hdr_offset = pseudo_hdr_offset; 2153 req->pad = 0; /* complete solid 16-byte block */ 2154 req->rdma_count = 1; 2155 req->flags |= flags | ((cum_len & 1) * odd_flag); 2156 cnt++; 2157 } 2158 2159 tx->req_list[0].rdma_count = cnt; 2160 #if 0 2161 /* print what the firmware will see */ 2162 for (i = 0; i < cnt; i++) { 2163 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2164 "cso:%d, flags:0x%x, rdma:%d\n", 2165 i, (int)ntohl(tx->req_list[i].addr_high), 2166 (int)ntohl(tx->req_list[i].addr_low), 2167 (int)ntohs(tx->req_list[i].length), 2168 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2169 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2170 tx->req_list[i].rdma_count); 2171 } 2172 printf("--------------\n"); 2173 #endif 2174 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2175 mxge_submit_req(tx, tx->req_list, cnt); 2176 #ifdef IFNET_BUF_RING 2177 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2178 /* tell the NIC to start polling this slice */ 2179 *tx->send_go = 1; 2180 tx->queue_active = 1; 2181 tx->activate++; 2182 wmb(); 2183 } 2184 #endif 2185 return; 2186 2187 drop: 2188 m_freem(m); 2189 drop_without_m: 2190 ss->oerrors++; 2191 return; 2192 } 2193 2194 #ifdef IFNET_BUF_RING 2195 static void 2196 mxge_qflush(struct ifnet *ifp) 2197 { 2198 mxge_softc_t *sc = ifp->if_softc; 2199 mxge_tx_ring_t *tx; 2200 struct mbuf *m; 2201 int slice; 2202 2203 for (slice = 0; slice < sc->num_slices; slice++) { 2204 tx = &sc->ss[slice].tx; 2205 mtx_lock(&tx->mtx); 2206 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2207 m_freem(m); 2208 mtx_unlock(&tx->mtx); 2209 } 2210 if_qflush(ifp); 2211 } 2212 2213 static inline void 2214 mxge_start_locked(struct mxge_slice_state *ss) 2215 { 2216 mxge_softc_t *sc; 2217 struct mbuf *m; 2218 struct ifnet *ifp; 2219 mxge_tx_ring_t *tx; 2220 2221 sc = ss->sc; 2222 ifp = sc->ifp; 2223 tx = &ss->tx; 2224 2225 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2226 m = drbr_dequeue(ifp, tx->br); 2227 if (m == NULL) { 2228 return; 2229 } 2230 /* let BPF see it */ 2231 BPF_MTAP(ifp, m); 2232 2233 /* give it to the nic */ 2234 mxge_encap(ss, m); 2235 } 2236 /* ran out of transmit slots */ 2237 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2238 && (!drbr_empty(ifp, tx->br))) { 2239 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2240 tx->stall++; 2241 } 2242 } 2243 2244 static int 2245 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2246 { 2247 mxge_softc_t *sc; 2248 struct ifnet *ifp; 2249 mxge_tx_ring_t *tx; 2250 int err; 2251 2252 sc = ss->sc; 2253 ifp = sc->ifp; 2254 tx = &ss->tx; 2255 2256 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2257 IFF_DRV_RUNNING) { 2258 err = drbr_enqueue(ifp, tx->br, m); 2259 return (err); 2260 } 2261 2262 if (!drbr_needs_enqueue(ifp, tx->br) && 2263 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2264 /* let BPF see it */ 2265 BPF_MTAP(ifp, m); 2266 /* give it to the nic */ 2267 mxge_encap(ss, m); 2268 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2269 return (err); 2270 } 2271 if (!drbr_empty(ifp, tx->br)) 2272 mxge_start_locked(ss); 2273 return (0); 2274 } 2275 2276 static int 2277 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2278 { 2279 mxge_softc_t *sc = ifp->if_softc; 2280 struct mxge_slice_state *ss; 2281 mxge_tx_ring_t *tx; 2282 int err = 0; 2283 int slice; 2284 2285 slice = m->m_pkthdr.flowid; 2286 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2287 2288 ss = &sc->ss[slice]; 2289 tx = &ss->tx; 2290 2291 if (mtx_trylock(&tx->mtx)) { 2292 err = mxge_transmit_locked(ss, m); 2293 mtx_unlock(&tx->mtx); 2294 } else { 2295 err = drbr_enqueue(ifp, tx->br, m); 2296 } 2297 2298 return (err); 2299 } 2300 2301 #else 2302 2303 static inline void 2304 mxge_start_locked(struct mxge_slice_state *ss) 2305 { 2306 mxge_softc_t *sc; 2307 struct mbuf *m; 2308 struct ifnet *ifp; 2309 mxge_tx_ring_t *tx; 2310 2311 sc = ss->sc; 2312 ifp = sc->ifp; 2313 tx = &ss->tx; 2314 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2315 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2316 if (m == NULL) { 2317 return; 2318 } 2319 /* let BPF see it */ 2320 BPF_MTAP(ifp, m); 2321 2322 /* give it to the nic */ 2323 mxge_encap(ss, m); 2324 } 2325 /* ran out of transmit slots */ 2326 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2327 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2328 tx->stall++; 2329 } 2330 } 2331 #endif 2332 static void 2333 mxge_start(struct ifnet *ifp) 2334 { 2335 mxge_softc_t *sc = ifp->if_softc; 2336 struct mxge_slice_state *ss; 2337 2338 /* only use the first slice for now */ 2339 ss = &sc->ss[0]; 2340 mtx_lock(&ss->tx.mtx); 2341 mxge_start_locked(ss); 2342 mtx_unlock(&ss->tx.mtx); 2343 } 2344 2345 /* 2346 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2347 * at most 32 bytes at a time, so as to avoid involving the software 2348 * pio handler in the nic. We re-write the first segment's low 2349 * DMA address to mark it valid only after we write the entire chunk 2350 * in a burst 2351 */ 2352 static inline void 2353 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2354 mcp_kreq_ether_recv_t *src) 2355 { 2356 uint32_t low; 2357 2358 low = src->addr_low; 2359 src->addr_low = 0xffffffff; 2360 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2361 wmb(); 2362 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2363 wmb(); 2364 src->addr_low = low; 2365 dst->addr_low = low; 2366 wmb(); 2367 } 2368 2369 static int 2370 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2371 { 2372 bus_dma_segment_t seg; 2373 struct mbuf *m; 2374 mxge_rx_ring_t *rx = &ss->rx_small; 2375 int cnt, err; 2376 2377 m = m_gethdr(M_NOWAIT, MT_DATA); 2378 if (m == NULL) { 2379 rx->alloc_fail++; 2380 err = ENOBUFS; 2381 goto done; 2382 } 2383 m->m_len = MHLEN; 2384 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2385 &seg, &cnt, BUS_DMA_NOWAIT); 2386 if (err != 0) { 2387 m_free(m); 2388 goto done; 2389 } 2390 rx->info[idx].m = m; 2391 rx->shadow[idx].addr_low = 2392 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2393 rx->shadow[idx].addr_high = 2394 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2395 2396 done: 2397 if ((idx & 7) == 7) 2398 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2399 return err; 2400 } 2401 2402 static int 2403 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2404 { 2405 bus_dma_segment_t seg[3]; 2406 struct mbuf *m; 2407 mxge_rx_ring_t *rx = &ss->rx_big; 2408 int cnt, err, i; 2409 2410 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2411 if (m == NULL) { 2412 rx->alloc_fail++; 2413 err = ENOBUFS; 2414 goto done; 2415 } 2416 m->m_len = rx->mlen; 2417 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2418 seg, &cnt, BUS_DMA_NOWAIT); 2419 if (err != 0) { 2420 m_free(m); 2421 goto done; 2422 } 2423 rx->info[idx].m = m; 2424 rx->shadow[idx].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2426 rx->shadow[idx].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2428 2429 #if MXGE_VIRT_JUMBOS 2430 for (i = 1; i < cnt; i++) { 2431 rx->shadow[idx + i].addr_low = 2432 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2433 rx->shadow[idx + i].addr_high = 2434 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2435 } 2436 #endif 2437 2438 done: 2439 for (i = 0; i < rx->nbufs; i++) { 2440 if ((idx & 7) == 7) { 2441 mxge_submit_8rx(&rx->lanai[idx - 7], 2442 &rx->shadow[idx - 7]); 2443 } 2444 idx++; 2445 } 2446 return err; 2447 } 2448 2449 #ifdef INET6 2450 2451 static uint16_t 2452 mxge_csum_generic(uint16_t *raw, int len) 2453 { 2454 uint32_t csum; 2455 2456 csum = 0; 2457 while (len > 0) { 2458 csum += *raw; 2459 raw++; 2460 len -= 2; 2461 } 2462 csum = (csum >> 16) + (csum & 0xffff); 2463 csum = (csum >> 16) + (csum & 0xffff); 2464 return (uint16_t)csum; 2465 } 2466 2467 static inline uint16_t 2468 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2469 { 2470 uint32_t partial; 2471 int nxt, cksum_offset; 2472 struct ip6_hdr *ip6 = p; 2473 uint16_t c; 2474 2475 nxt = ip6->ip6_nxt; 2476 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2477 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2478 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2479 IPPROTO_IPV6, &nxt); 2480 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2481 return (1); 2482 } 2483 2484 /* 2485 * IPv6 headers do not contain a checksum, and hence 2486 * do not checksum to zero, so they don't "fall out" 2487 * of the partial checksum calculation like IPv4 2488 * headers do. We need to fix the partial checksum by 2489 * subtracting the checksum of the IPv6 header. 2490 */ 2491 2492 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2493 ETHER_HDR_LEN); 2494 csum += ~partial; 2495 csum += (csum < ~partial); 2496 csum = (csum >> 16) + (csum & 0xFFFF); 2497 csum = (csum >> 16) + (csum & 0xFFFF); 2498 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2499 csum); 2500 c ^= 0xffff; 2501 return (c); 2502 } 2503 #endif /* INET6 */ 2504 /* 2505 * Myri10GE hardware checksums are not valid if the sender 2506 * padded the frame with non-zero padding. This is because 2507 * the firmware just does a simple 16-bit 1s complement 2508 * checksum across the entire frame, excluding the first 14 2509 * bytes. It is best to simply to check the checksum and 2510 * tell the stack about it only if the checksum is good 2511 */ 2512 2513 static inline uint16_t 2514 mxge_rx_csum(struct mbuf *m, int csum) 2515 { 2516 struct ether_header *eh; 2517 #ifdef INET 2518 struct ip *ip; 2519 #endif 2520 #if defined(INET) || defined(INET6) 2521 int cap = m->m_pkthdr.rcvif->if_capenable; 2522 #endif 2523 uint16_t c, etype; 2524 2525 eh = mtod(m, struct ether_header *); 2526 etype = ntohs(eh->ether_type); 2527 switch (etype) { 2528 #ifdef INET 2529 case ETHERTYPE_IP: 2530 if ((cap & IFCAP_RXCSUM) == 0) 2531 return (1); 2532 ip = (struct ip *)(eh + 1); 2533 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2534 return (1); 2535 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2536 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2537 (ip->ip_hl << 2) + ip->ip_p)); 2538 c ^= 0xffff; 2539 break; 2540 #endif 2541 #ifdef INET6 2542 case ETHERTYPE_IPV6: 2543 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2544 return (1); 2545 c = mxge_rx_csum6((eh + 1), m, csum); 2546 break; 2547 #endif 2548 default: 2549 c = 1; 2550 } 2551 return (c); 2552 } 2553 2554 static void 2555 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2556 { 2557 struct ether_vlan_header *evl; 2558 uint32_t partial; 2559 2560 evl = mtod(m, struct ether_vlan_header *); 2561 2562 /* 2563 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2564 * after what the firmware thought was the end of the ethernet 2565 * header. 2566 */ 2567 2568 /* put checksum into host byte order */ 2569 *csum = ntohs(*csum); 2570 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2571 (*csum) += ~partial; 2572 (*csum) += ((*csum) < ~partial); 2573 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2574 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2575 2576 /* restore checksum to network byte order; 2577 later consumers expect this */ 2578 *csum = htons(*csum); 2579 2580 /* save the tag */ 2581 #ifdef MXGE_NEW_VLAN_API 2582 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2583 #else 2584 { 2585 struct m_tag *mtag; 2586 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2587 M_NOWAIT); 2588 if (mtag == NULL) 2589 return; 2590 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2591 m_tag_prepend(m, mtag); 2592 } 2593 2594 #endif 2595 m->m_flags |= M_VLANTAG; 2596 2597 /* 2598 * Remove the 802.1q header by copying the Ethernet 2599 * addresses over it and adjusting the beginning of 2600 * the data in the mbuf. The encapsulated Ethernet 2601 * type field is already in place. 2602 */ 2603 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2604 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2605 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2606 } 2607 2608 static inline void 2609 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2610 uint32_t csum, int lro) 2611 { 2612 mxge_softc_t *sc; 2613 struct ifnet *ifp; 2614 struct mbuf *m; 2615 struct ether_header *eh; 2616 mxge_rx_ring_t *rx; 2617 bus_dmamap_t old_map; 2618 int idx; 2619 2620 sc = ss->sc; 2621 ifp = sc->ifp; 2622 rx = &ss->rx_big; 2623 idx = rx->cnt & rx->mask; 2624 rx->cnt += rx->nbufs; 2625 /* save a pointer to the received mbuf */ 2626 m = rx->info[idx].m; 2627 /* try to replace the received mbuf */ 2628 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2629 /* drop the frame -- the old mbuf is re-cycled */ 2630 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2631 return; 2632 } 2633 2634 /* unmap the received buffer */ 2635 old_map = rx->info[idx].map; 2636 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2637 bus_dmamap_unload(rx->dmat, old_map); 2638 2639 /* swap the bus_dmamap_t's */ 2640 rx->info[idx].map = rx->extra_map; 2641 rx->extra_map = old_map; 2642 2643 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2644 * aligned */ 2645 m->m_data += MXGEFW_PAD; 2646 2647 m->m_pkthdr.rcvif = ifp; 2648 m->m_len = m->m_pkthdr.len = len; 2649 ss->ipackets++; 2650 eh = mtod(m, struct ether_header *); 2651 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2652 mxge_vlan_tag_remove(m, &csum); 2653 } 2654 /* flowid only valid if RSS hashing is enabled */ 2655 if (sc->num_slices > 1) { 2656 m->m_pkthdr.flowid = (ss - sc->ss); 2657 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2658 } 2659 /* if the checksum is valid, mark it in the mbuf header */ 2660 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2661 (0 == mxge_rx_csum(m, csum))) { 2662 /* Tell the stack that the checksum is good */ 2663 m->m_pkthdr.csum_data = 0xffff; 2664 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2665 CSUM_DATA_VALID; 2666 2667 #if defined(INET) || defined (INET6) 2668 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2669 return; 2670 #endif 2671 } 2672 /* pass the frame up the stack */ 2673 (*ifp->if_input)(ifp, m); 2674 } 2675 2676 static inline void 2677 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2678 uint32_t csum, int lro) 2679 { 2680 mxge_softc_t *sc; 2681 struct ifnet *ifp; 2682 struct ether_header *eh; 2683 struct mbuf *m; 2684 mxge_rx_ring_t *rx; 2685 bus_dmamap_t old_map; 2686 int idx; 2687 2688 sc = ss->sc; 2689 ifp = sc->ifp; 2690 rx = &ss->rx_small; 2691 idx = rx->cnt & rx->mask; 2692 rx->cnt++; 2693 /* save a pointer to the received mbuf */ 2694 m = rx->info[idx].m; 2695 /* try to replace the received mbuf */ 2696 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2697 /* drop the frame -- the old mbuf is re-cycled */ 2698 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2699 return; 2700 } 2701 2702 /* unmap the received buffer */ 2703 old_map = rx->info[idx].map; 2704 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2705 bus_dmamap_unload(rx->dmat, old_map); 2706 2707 /* swap the bus_dmamap_t's */ 2708 rx->info[idx].map = rx->extra_map; 2709 rx->extra_map = old_map; 2710 2711 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2712 * aligned */ 2713 m->m_data += MXGEFW_PAD; 2714 2715 m->m_pkthdr.rcvif = ifp; 2716 m->m_len = m->m_pkthdr.len = len; 2717 ss->ipackets++; 2718 eh = mtod(m, struct ether_header *); 2719 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2720 mxge_vlan_tag_remove(m, &csum); 2721 } 2722 /* flowid only valid if RSS hashing is enabled */ 2723 if (sc->num_slices > 1) { 2724 m->m_pkthdr.flowid = (ss - sc->ss); 2725 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2726 } 2727 /* if the checksum is valid, mark it in the mbuf header */ 2728 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2729 (0 == mxge_rx_csum(m, csum))) { 2730 /* Tell the stack that the checksum is good */ 2731 m->m_pkthdr.csum_data = 0xffff; 2732 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2733 CSUM_DATA_VALID; 2734 2735 #if defined(INET) || defined (INET6) 2736 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2737 return; 2738 #endif 2739 } 2740 /* pass the frame up the stack */ 2741 (*ifp->if_input)(ifp, m); 2742 } 2743 2744 static inline void 2745 mxge_clean_rx_done(struct mxge_slice_state *ss) 2746 { 2747 mxge_rx_done_t *rx_done = &ss->rx_done; 2748 int limit = 0; 2749 uint16_t length; 2750 uint16_t checksum; 2751 int lro; 2752 2753 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2754 while (rx_done->entry[rx_done->idx].length != 0) { 2755 length = ntohs(rx_done->entry[rx_done->idx].length); 2756 rx_done->entry[rx_done->idx].length = 0; 2757 checksum = rx_done->entry[rx_done->idx].checksum; 2758 if (length <= (MHLEN - MXGEFW_PAD)) 2759 mxge_rx_done_small(ss, length, checksum, lro); 2760 else 2761 mxge_rx_done_big(ss, length, checksum, lro); 2762 rx_done->cnt++; 2763 rx_done->idx = rx_done->cnt & rx_done->mask; 2764 2765 /* limit potential for livelock */ 2766 if (__predict_false(++limit > rx_done->mask / 2)) 2767 break; 2768 } 2769 #if defined(INET) || defined (INET6) 2770 tcp_lro_flush_all(&ss->lc); 2771 #endif 2772 } 2773 2774 static inline void 2775 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2776 { 2777 struct ifnet *ifp __unused; 2778 mxge_tx_ring_t *tx; 2779 struct mbuf *m; 2780 bus_dmamap_t map; 2781 int idx; 2782 int *flags; 2783 2784 tx = &ss->tx; 2785 ifp = ss->sc->ifp; 2786 while (tx->pkt_done != mcp_idx) { 2787 idx = tx->done & tx->mask; 2788 tx->done++; 2789 m = tx->info[idx].m; 2790 /* mbuf and DMA map only attached to the first 2791 segment per-mbuf */ 2792 if (m != NULL) { 2793 ss->obytes += m->m_pkthdr.len; 2794 if (m->m_flags & M_MCAST) 2795 ss->omcasts++; 2796 ss->opackets++; 2797 tx->info[idx].m = NULL; 2798 map = tx->info[idx].map; 2799 bus_dmamap_unload(tx->dmat, map); 2800 m_freem(m); 2801 } 2802 if (tx->info[idx].flag) { 2803 tx->info[idx].flag = 0; 2804 tx->pkt_done++; 2805 } 2806 } 2807 2808 /* If we have space, clear IFF_OACTIVE to tell the stack that 2809 its OK to send packets */ 2810 #ifdef IFNET_BUF_RING 2811 flags = &ss->if_drv_flags; 2812 #else 2813 flags = &ifp->if_drv_flags; 2814 #endif 2815 mtx_lock(&ss->tx.mtx); 2816 if ((*flags) & IFF_DRV_OACTIVE && 2817 tx->req - tx->done < (tx->mask + 1)/4) { 2818 *(flags) &= ~IFF_DRV_OACTIVE; 2819 ss->tx.wake++; 2820 mxge_start_locked(ss); 2821 } 2822 #ifdef IFNET_BUF_RING 2823 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2824 /* let the NIC stop polling this queue, since there 2825 * are no more transmits pending */ 2826 if (tx->req == tx->done) { 2827 *tx->send_stop = 1; 2828 tx->queue_active = 0; 2829 tx->deactivate++; 2830 wmb(); 2831 } 2832 } 2833 #endif 2834 mtx_unlock(&ss->tx.mtx); 2835 2836 } 2837 2838 static struct mxge_media_type mxge_xfp_media_types[] = 2839 { 2840 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2841 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2842 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2843 {0, (1 << 5), "10GBASE-ER"}, 2844 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2845 {0, (1 << 3), "10GBASE-SW"}, 2846 {0, (1 << 2), "10GBASE-LW"}, 2847 {0, (1 << 1), "10GBASE-EW"}, 2848 {0, (1 << 0), "Reserved"} 2849 }; 2850 static struct mxge_media_type mxge_sfp_media_types[] = 2851 { 2852 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2853 {0, (1 << 7), "Reserved"}, 2854 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2855 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2856 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2857 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2858 }; 2859 2860 static void 2861 mxge_media_set(mxge_softc_t *sc, int media_type) 2862 { 2863 2864 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2865 0, NULL); 2866 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2867 sc->current_media = media_type; 2868 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2869 } 2870 2871 static void 2872 mxge_media_init(mxge_softc_t *sc) 2873 { 2874 char *ptr; 2875 int i; 2876 2877 ifmedia_removeall(&sc->media); 2878 mxge_media_set(sc, IFM_AUTO); 2879 2880 /* 2881 * parse the product code to deterimine the interface type 2882 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2883 * after the 3rd dash in the driver's cached copy of the 2884 * EEPROM's product code string. 2885 */ 2886 ptr = sc->product_code_string; 2887 if (ptr == NULL) { 2888 device_printf(sc->dev, "Missing product code\n"); 2889 return; 2890 } 2891 2892 for (i = 0; i < 3; i++, ptr++) { 2893 ptr = strchr(ptr, '-'); 2894 if (ptr == NULL) { 2895 device_printf(sc->dev, 2896 "only %d dashes in PC?!?\n", i); 2897 return; 2898 } 2899 } 2900 if (*ptr == 'C' || *(ptr +1) == 'C') { 2901 /* -C is CX4 */ 2902 sc->connector = MXGE_CX4; 2903 mxge_media_set(sc, IFM_10G_CX4); 2904 } else if (*ptr == 'Q') { 2905 /* -Q is Quad Ribbon Fiber */ 2906 sc->connector = MXGE_QRF; 2907 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2908 /* FreeBSD has no media type for Quad ribbon fiber */ 2909 } else if (*ptr == 'R') { 2910 /* -R is XFP */ 2911 sc->connector = MXGE_XFP; 2912 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2913 /* -S or -2S is SFP+ */ 2914 sc->connector = MXGE_SFP; 2915 } else { 2916 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2917 } 2918 } 2919 2920 /* 2921 * Determine the media type for a NIC. Some XFPs will identify 2922 * themselves only when their link is up, so this is initiated via a 2923 * link up interrupt. However, this can potentially take up to 2924 * several milliseconds, so it is run via the watchdog routine, rather 2925 * than in the interrupt handler itself. 2926 */ 2927 static void 2928 mxge_media_probe(mxge_softc_t *sc) 2929 { 2930 mxge_cmd_t cmd; 2931 char *cage_type; 2932 2933 struct mxge_media_type *mxge_media_types = NULL; 2934 int i, err, ms, mxge_media_type_entries; 2935 uint32_t byte; 2936 2937 sc->need_media_probe = 0; 2938 2939 if (sc->connector == MXGE_XFP) { 2940 /* -R is XFP */ 2941 mxge_media_types = mxge_xfp_media_types; 2942 mxge_media_type_entries = 2943 nitems(mxge_xfp_media_types); 2944 byte = MXGE_XFP_COMPLIANCE_BYTE; 2945 cage_type = "XFP"; 2946 } else if (sc->connector == MXGE_SFP) { 2947 /* -S or -2S is SFP+ */ 2948 mxge_media_types = mxge_sfp_media_types; 2949 mxge_media_type_entries = 2950 nitems(mxge_sfp_media_types); 2951 cage_type = "SFP+"; 2952 byte = 3; 2953 } else { 2954 /* nothing to do; media type cannot change */ 2955 return; 2956 } 2957 2958 /* 2959 * At this point we know the NIC has an XFP cage, so now we 2960 * try to determine what is in the cage by using the 2961 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2962 * register. We read just one byte, which may take over 2963 * a millisecond 2964 */ 2965 2966 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2967 cmd.data1 = byte; 2968 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2969 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2970 device_printf(sc->dev, "failed to read XFP\n"); 2971 } 2972 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2973 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2974 } 2975 if (err != MXGEFW_CMD_OK) { 2976 return; 2977 } 2978 2979 /* now we wait for the data to be cached */ 2980 cmd.data0 = byte; 2981 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2982 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2983 DELAY(1000); 2984 cmd.data0 = byte; 2985 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2986 } 2987 if (err != MXGEFW_CMD_OK) { 2988 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2989 cage_type, err, ms); 2990 return; 2991 } 2992 2993 if (cmd.data0 == mxge_media_types[0].bitmask) { 2994 if (mxge_verbose) 2995 device_printf(sc->dev, "%s:%s\n", cage_type, 2996 mxge_media_types[0].name); 2997 if (sc->current_media != mxge_media_types[0].flag) { 2998 mxge_media_init(sc); 2999 mxge_media_set(sc, mxge_media_types[0].flag); 3000 } 3001 return; 3002 } 3003 for (i = 1; i < mxge_media_type_entries; i++) { 3004 if (cmd.data0 & mxge_media_types[i].bitmask) { 3005 if (mxge_verbose) 3006 device_printf(sc->dev, "%s:%s\n", 3007 cage_type, 3008 mxge_media_types[i].name); 3009 3010 if (sc->current_media != mxge_media_types[i].flag) { 3011 mxge_media_init(sc); 3012 mxge_media_set(sc, mxge_media_types[i].flag); 3013 } 3014 return; 3015 } 3016 } 3017 if (mxge_verbose) 3018 device_printf(sc->dev, "%s media 0x%x unknown\n", 3019 cage_type, cmd.data0); 3020 3021 return; 3022 } 3023 3024 static void 3025 mxge_intr(void *arg) 3026 { 3027 struct mxge_slice_state *ss = arg; 3028 mxge_softc_t *sc = ss->sc; 3029 mcp_irq_data_t *stats = ss->fw_stats; 3030 mxge_tx_ring_t *tx = &ss->tx; 3031 mxge_rx_done_t *rx_done = &ss->rx_done; 3032 uint32_t send_done_count; 3033 uint8_t valid; 3034 3035 #ifndef IFNET_BUF_RING 3036 /* an interrupt on a non-zero slice is implicitly valid 3037 since MSI-X irqs are not shared */ 3038 if (ss != sc->ss) { 3039 mxge_clean_rx_done(ss); 3040 *ss->irq_claim = be32toh(3); 3041 return; 3042 } 3043 #endif 3044 3045 /* make sure the DMA has finished */ 3046 if (!stats->valid) { 3047 return; 3048 } 3049 valid = stats->valid; 3050 3051 if (sc->legacy_irq) { 3052 /* lower legacy IRQ */ 3053 *sc->irq_deassert = 0; 3054 if (!mxge_deassert_wait) 3055 /* don't wait for conf. that irq is low */ 3056 stats->valid = 0; 3057 } else { 3058 stats->valid = 0; 3059 } 3060 3061 /* loop while waiting for legacy irq deassertion */ 3062 do { 3063 /* check for transmit completes and receives */ 3064 send_done_count = be32toh(stats->send_done_count); 3065 while ((send_done_count != tx->pkt_done) || 3066 (rx_done->entry[rx_done->idx].length != 0)) { 3067 if (send_done_count != tx->pkt_done) 3068 mxge_tx_done(ss, (int)send_done_count); 3069 mxge_clean_rx_done(ss); 3070 send_done_count = be32toh(stats->send_done_count); 3071 } 3072 if (sc->legacy_irq && mxge_deassert_wait) 3073 wmb(); 3074 } while (*((volatile uint8_t *) &stats->valid)); 3075 3076 /* fw link & error stats meaningful only on the first slice */ 3077 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3078 if (sc->link_state != stats->link_up) { 3079 sc->link_state = stats->link_up; 3080 if (sc->link_state) { 3081 if_link_state_change(sc->ifp, LINK_STATE_UP); 3082 if (mxge_verbose) 3083 device_printf(sc->dev, "link up\n"); 3084 } else { 3085 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3086 if (mxge_verbose) 3087 device_printf(sc->dev, "link down\n"); 3088 } 3089 sc->need_media_probe = 1; 3090 } 3091 if (sc->rdma_tags_available != 3092 be32toh(stats->rdma_tags_available)) { 3093 sc->rdma_tags_available = 3094 be32toh(stats->rdma_tags_available); 3095 device_printf(sc->dev, "RDMA timed out! %d tags " 3096 "left\n", sc->rdma_tags_available); 3097 } 3098 3099 if (stats->link_down) { 3100 sc->down_cnt += stats->link_down; 3101 sc->link_state = 0; 3102 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3103 } 3104 } 3105 3106 /* check to see if we have rx token to pass back */ 3107 if (valid & 0x1) 3108 *ss->irq_claim = be32toh(3); 3109 *(ss->irq_claim + 1) = be32toh(3); 3110 } 3111 3112 static void 3113 mxge_init(void *arg) 3114 { 3115 mxge_softc_t *sc = arg; 3116 struct ifnet *ifp = sc->ifp; 3117 3118 mtx_lock(&sc->driver_mtx); 3119 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3120 (void) mxge_open(sc); 3121 mtx_unlock(&sc->driver_mtx); 3122 } 3123 3124 static void 3125 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3126 { 3127 int i; 3128 3129 #if defined(INET) || defined(INET6) 3130 tcp_lro_free(&ss->lc); 3131 #endif 3132 for (i = 0; i <= ss->rx_big.mask; i++) { 3133 if (ss->rx_big.info[i].m == NULL) 3134 continue; 3135 bus_dmamap_unload(ss->rx_big.dmat, 3136 ss->rx_big.info[i].map); 3137 m_freem(ss->rx_big.info[i].m); 3138 ss->rx_big.info[i].m = NULL; 3139 } 3140 3141 for (i = 0; i <= ss->rx_small.mask; i++) { 3142 if (ss->rx_small.info[i].m == NULL) 3143 continue; 3144 bus_dmamap_unload(ss->rx_small.dmat, 3145 ss->rx_small.info[i].map); 3146 m_freem(ss->rx_small.info[i].m); 3147 ss->rx_small.info[i].m = NULL; 3148 } 3149 3150 /* transmit ring used only on the first slice */ 3151 if (ss->tx.info == NULL) 3152 return; 3153 3154 for (i = 0; i <= ss->tx.mask; i++) { 3155 ss->tx.info[i].flag = 0; 3156 if (ss->tx.info[i].m == NULL) 3157 continue; 3158 bus_dmamap_unload(ss->tx.dmat, 3159 ss->tx.info[i].map); 3160 m_freem(ss->tx.info[i].m); 3161 ss->tx.info[i].m = NULL; 3162 } 3163 } 3164 3165 static void 3166 mxge_free_mbufs(mxge_softc_t *sc) 3167 { 3168 int slice; 3169 3170 for (slice = 0; slice < sc->num_slices; slice++) 3171 mxge_free_slice_mbufs(&sc->ss[slice]); 3172 } 3173 3174 static void 3175 mxge_free_slice_rings(struct mxge_slice_state *ss) 3176 { 3177 int i; 3178 3179 if (ss->rx_done.entry != NULL) 3180 mxge_dma_free(&ss->rx_done.dma); 3181 ss->rx_done.entry = NULL; 3182 3183 if (ss->tx.req_bytes != NULL) 3184 free(ss->tx.req_bytes, M_DEVBUF); 3185 ss->tx.req_bytes = NULL; 3186 3187 if (ss->tx.seg_list != NULL) 3188 free(ss->tx.seg_list, M_DEVBUF); 3189 ss->tx.seg_list = NULL; 3190 3191 if (ss->rx_small.shadow != NULL) 3192 free(ss->rx_small.shadow, M_DEVBUF); 3193 ss->rx_small.shadow = NULL; 3194 3195 if (ss->rx_big.shadow != NULL) 3196 free(ss->rx_big.shadow, M_DEVBUF); 3197 ss->rx_big.shadow = NULL; 3198 3199 if (ss->tx.info != NULL) { 3200 if (ss->tx.dmat != NULL) { 3201 for (i = 0; i <= ss->tx.mask; i++) { 3202 bus_dmamap_destroy(ss->tx.dmat, 3203 ss->tx.info[i].map); 3204 } 3205 bus_dma_tag_destroy(ss->tx.dmat); 3206 } 3207 free(ss->tx.info, M_DEVBUF); 3208 } 3209 ss->tx.info = NULL; 3210 3211 if (ss->rx_small.info != NULL) { 3212 if (ss->rx_small.dmat != NULL) { 3213 for (i = 0; i <= ss->rx_small.mask; i++) { 3214 bus_dmamap_destroy(ss->rx_small.dmat, 3215 ss->rx_small.info[i].map); 3216 } 3217 bus_dmamap_destroy(ss->rx_small.dmat, 3218 ss->rx_small.extra_map); 3219 bus_dma_tag_destroy(ss->rx_small.dmat); 3220 } 3221 free(ss->rx_small.info, M_DEVBUF); 3222 } 3223 ss->rx_small.info = NULL; 3224 3225 if (ss->rx_big.info != NULL) { 3226 if (ss->rx_big.dmat != NULL) { 3227 for (i = 0; i <= ss->rx_big.mask; i++) { 3228 bus_dmamap_destroy(ss->rx_big.dmat, 3229 ss->rx_big.info[i].map); 3230 } 3231 bus_dmamap_destroy(ss->rx_big.dmat, 3232 ss->rx_big.extra_map); 3233 bus_dma_tag_destroy(ss->rx_big.dmat); 3234 } 3235 free(ss->rx_big.info, M_DEVBUF); 3236 } 3237 ss->rx_big.info = NULL; 3238 } 3239 3240 static void 3241 mxge_free_rings(mxge_softc_t *sc) 3242 { 3243 int slice; 3244 3245 for (slice = 0; slice < sc->num_slices; slice++) 3246 mxge_free_slice_rings(&sc->ss[slice]); 3247 } 3248 3249 static int 3250 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3251 int tx_ring_entries) 3252 { 3253 mxge_softc_t *sc = ss->sc; 3254 size_t bytes; 3255 int err, i; 3256 3257 /* allocate per-slice receive resources */ 3258 3259 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3260 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3261 3262 /* allocate the rx shadow rings */ 3263 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3264 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3265 3266 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3267 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3268 3269 /* allocate the rx host info rings */ 3270 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3271 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3272 3273 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3274 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3275 3276 /* allocate the rx busdma resources */ 3277 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3278 1, /* alignment */ 3279 4096, /* boundary */ 3280 BUS_SPACE_MAXADDR, /* low */ 3281 BUS_SPACE_MAXADDR, /* high */ 3282 NULL, NULL, /* filter */ 3283 MHLEN, /* maxsize */ 3284 1, /* num segs */ 3285 MHLEN, /* maxsegsize */ 3286 BUS_DMA_ALLOCNOW, /* flags */ 3287 NULL, NULL, /* lock */ 3288 &ss->rx_small.dmat); /* tag */ 3289 if (err != 0) { 3290 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3291 err); 3292 return err; 3293 } 3294 3295 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3296 1, /* alignment */ 3297 #if MXGE_VIRT_JUMBOS 3298 4096, /* boundary */ 3299 #else 3300 0, /* boundary */ 3301 #endif 3302 BUS_SPACE_MAXADDR, /* low */ 3303 BUS_SPACE_MAXADDR, /* high */ 3304 NULL, NULL, /* filter */ 3305 3*4096, /* maxsize */ 3306 #if MXGE_VIRT_JUMBOS 3307 3, /* num segs */ 3308 4096, /* maxsegsize*/ 3309 #else 3310 1, /* num segs */ 3311 MJUM9BYTES, /* maxsegsize*/ 3312 #endif 3313 BUS_DMA_ALLOCNOW, /* flags */ 3314 NULL, NULL, /* lock */ 3315 &ss->rx_big.dmat); /* tag */ 3316 if (err != 0) { 3317 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3318 err); 3319 return err; 3320 } 3321 for (i = 0; i <= ss->rx_small.mask; i++) { 3322 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3323 &ss->rx_small.info[i].map); 3324 if (err != 0) { 3325 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3326 err); 3327 return err; 3328 } 3329 } 3330 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3331 &ss->rx_small.extra_map); 3332 if (err != 0) { 3333 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3334 err); 3335 return err; 3336 } 3337 3338 for (i = 0; i <= ss->rx_big.mask; i++) { 3339 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3340 &ss->rx_big.info[i].map); 3341 if (err != 0) { 3342 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3343 err); 3344 return err; 3345 } 3346 } 3347 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3348 &ss->rx_big.extra_map); 3349 if (err != 0) { 3350 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3351 err); 3352 return err; 3353 } 3354 3355 /* now allocate TX resources */ 3356 3357 #ifndef IFNET_BUF_RING 3358 /* only use a single TX ring for now */ 3359 if (ss != ss->sc->ss) 3360 return 0; 3361 #endif 3362 3363 ss->tx.mask = tx_ring_entries - 1; 3364 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3365 3366 /* allocate the tx request copy block */ 3367 bytes = 8 + 3368 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3369 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3370 /* ensure req_list entries are aligned to 8 bytes */ 3371 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3372 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL); 3373 3374 /* allocate the tx busdma segment list */ 3375 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3376 ss->tx.seg_list = (bus_dma_segment_t *) 3377 malloc(bytes, M_DEVBUF, M_WAITOK); 3378 3379 /* allocate the tx host info ring */ 3380 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3381 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3382 3383 /* allocate the tx busdma resources */ 3384 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3385 1, /* alignment */ 3386 sc->tx_boundary, /* boundary */ 3387 BUS_SPACE_MAXADDR, /* low */ 3388 BUS_SPACE_MAXADDR, /* high */ 3389 NULL, NULL, /* filter */ 3390 65536 + 256, /* maxsize */ 3391 ss->tx.max_desc - 2, /* num segs */ 3392 sc->tx_boundary, /* maxsegsz */ 3393 BUS_DMA_ALLOCNOW, /* flags */ 3394 NULL, NULL, /* lock */ 3395 &ss->tx.dmat); /* tag */ 3396 3397 if (err != 0) { 3398 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3399 err); 3400 return err; 3401 } 3402 3403 /* now use these tags to setup dmamaps for each slot 3404 in the ring */ 3405 for (i = 0; i <= ss->tx.mask; i++) { 3406 err = bus_dmamap_create(ss->tx.dmat, 0, 3407 &ss->tx.info[i].map); 3408 if (err != 0) { 3409 device_printf(sc->dev, "Err %d tx dmamap\n", 3410 err); 3411 return err; 3412 } 3413 } 3414 return 0; 3415 3416 } 3417 3418 static int 3419 mxge_alloc_rings(mxge_softc_t *sc) 3420 { 3421 mxge_cmd_t cmd; 3422 int tx_ring_size; 3423 int tx_ring_entries, rx_ring_entries; 3424 int err, slice; 3425 3426 /* get ring sizes */ 3427 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3428 tx_ring_size = cmd.data0; 3429 if (err != 0) { 3430 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3431 goto abort; 3432 } 3433 3434 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3435 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3436 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3437 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3438 IFQ_SET_READY(&sc->ifp->if_snd); 3439 3440 for (slice = 0; slice < sc->num_slices; slice++) { 3441 err = mxge_alloc_slice_rings(&sc->ss[slice], 3442 rx_ring_entries, 3443 tx_ring_entries); 3444 if (err != 0) 3445 goto abort; 3446 } 3447 return 0; 3448 3449 abort: 3450 mxge_free_rings(sc); 3451 return err; 3452 3453 } 3454 3455 static void 3456 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3457 { 3458 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3459 3460 if (bufsize < MCLBYTES) { 3461 /* easy, everything fits in a single buffer */ 3462 *big_buf_size = MCLBYTES; 3463 *cl_size = MCLBYTES; 3464 *nbufs = 1; 3465 return; 3466 } 3467 3468 if (bufsize < MJUMPAGESIZE) { 3469 /* still easy, everything still fits in a single buffer */ 3470 *big_buf_size = MJUMPAGESIZE; 3471 *cl_size = MJUMPAGESIZE; 3472 *nbufs = 1; 3473 return; 3474 } 3475 #if MXGE_VIRT_JUMBOS 3476 /* now we need to use virtually contiguous buffers */ 3477 *cl_size = MJUM9BYTES; 3478 *big_buf_size = 4096; 3479 *nbufs = mtu / 4096 + 1; 3480 /* needs to be a power of two, so round up */ 3481 if (*nbufs == 3) 3482 *nbufs = 4; 3483 #else 3484 *cl_size = MJUM9BYTES; 3485 *big_buf_size = MJUM9BYTES; 3486 *nbufs = 1; 3487 #endif 3488 } 3489 3490 static int 3491 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3492 { 3493 mxge_softc_t *sc; 3494 mxge_cmd_t cmd; 3495 bus_dmamap_t map; 3496 int err, i, slice; 3497 3498 sc = ss->sc; 3499 slice = ss - sc->ss; 3500 3501 #if defined(INET) || defined(INET6) 3502 (void)tcp_lro_init(&ss->lc); 3503 #endif 3504 ss->lc.ifp = sc->ifp; 3505 3506 /* get the lanai pointers to the send and receive rings */ 3507 3508 err = 0; 3509 #ifndef IFNET_BUF_RING 3510 /* We currently only send from the first slice */ 3511 if (slice == 0) { 3512 #endif 3513 cmd.data0 = slice; 3514 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3515 ss->tx.lanai = 3516 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3517 ss->tx.send_go = (volatile uint32_t *) 3518 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3519 ss->tx.send_stop = (volatile uint32_t *) 3520 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3521 #ifndef IFNET_BUF_RING 3522 } 3523 #endif 3524 cmd.data0 = slice; 3525 err |= mxge_send_cmd(sc, 3526 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3527 ss->rx_small.lanai = 3528 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3529 cmd.data0 = slice; 3530 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3531 ss->rx_big.lanai = 3532 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3533 3534 if (err != 0) { 3535 device_printf(sc->dev, 3536 "failed to get ring sizes or locations\n"); 3537 return EIO; 3538 } 3539 3540 /* stock receive rings */ 3541 for (i = 0; i <= ss->rx_small.mask; i++) { 3542 map = ss->rx_small.info[i].map; 3543 err = mxge_get_buf_small(ss, map, i); 3544 if (err) { 3545 device_printf(sc->dev, "alloced %d/%d smalls\n", 3546 i, ss->rx_small.mask + 1); 3547 return ENOMEM; 3548 } 3549 } 3550 for (i = 0; i <= ss->rx_big.mask; i++) { 3551 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3552 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3553 } 3554 ss->rx_big.nbufs = nbufs; 3555 ss->rx_big.cl_size = cl_size; 3556 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3557 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3558 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3559 map = ss->rx_big.info[i].map; 3560 err = mxge_get_buf_big(ss, map, i); 3561 if (err) { 3562 device_printf(sc->dev, "alloced %d/%d bigs\n", 3563 i, ss->rx_big.mask + 1); 3564 return ENOMEM; 3565 } 3566 } 3567 return 0; 3568 } 3569 3570 static int 3571 mxge_open(mxge_softc_t *sc) 3572 { 3573 mxge_cmd_t cmd; 3574 int err, big_bytes, nbufs, slice, cl_size, i; 3575 bus_addr_t bus; 3576 volatile uint8_t *itable; 3577 struct mxge_slice_state *ss; 3578 3579 /* Copy the MAC address in case it was overridden */ 3580 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3581 3582 err = mxge_reset(sc, 1); 3583 if (err != 0) { 3584 device_printf(sc->dev, "failed to reset\n"); 3585 return EIO; 3586 } 3587 3588 if (sc->num_slices > 1) { 3589 /* setup the indirection table */ 3590 cmd.data0 = sc->num_slices; 3591 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3592 &cmd); 3593 3594 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3595 &cmd); 3596 if (err != 0) { 3597 device_printf(sc->dev, 3598 "failed to setup rss tables\n"); 3599 return err; 3600 } 3601 3602 /* just enable an identity mapping */ 3603 itable = sc->sram + cmd.data0; 3604 for (i = 0; i < sc->num_slices; i++) 3605 itable[i] = (uint8_t)i; 3606 3607 cmd.data0 = 1; 3608 cmd.data1 = mxge_rss_hash_type; 3609 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3610 if (err != 0) { 3611 device_printf(sc->dev, "failed to enable slices\n"); 3612 return err; 3613 } 3614 } 3615 3616 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3617 3618 cmd.data0 = nbufs; 3619 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3620 &cmd); 3621 /* error is only meaningful if we're trying to set 3622 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3623 if (err && nbufs > 1) { 3624 device_printf(sc->dev, 3625 "Failed to set alway-use-n to %d\n", 3626 nbufs); 3627 return EIO; 3628 } 3629 /* Give the firmware the mtu and the big and small buffer 3630 sizes. The firmware wants the big buf size to be a power 3631 of two. Luckily, FreeBSD's clusters are powers of two */ 3632 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3633 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3634 cmd.data0 = MHLEN - MXGEFW_PAD; 3635 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3636 &cmd); 3637 cmd.data0 = big_bytes; 3638 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3639 3640 if (err != 0) { 3641 device_printf(sc->dev, "failed to setup params\n"); 3642 goto abort; 3643 } 3644 3645 /* Now give him the pointer to the stats block */ 3646 for (slice = 0; 3647 #ifdef IFNET_BUF_RING 3648 slice < sc->num_slices; 3649 #else 3650 slice < 1; 3651 #endif 3652 slice++) { 3653 ss = &sc->ss[slice]; 3654 cmd.data0 = 3655 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3656 cmd.data1 = 3657 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3658 cmd.data2 = sizeof(struct mcp_irq_data); 3659 cmd.data2 |= (slice << 16); 3660 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3661 } 3662 3663 if (err != 0) { 3664 bus = sc->ss->fw_stats_dma.bus_addr; 3665 bus += offsetof(struct mcp_irq_data, send_done_count); 3666 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3667 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3668 err = mxge_send_cmd(sc, 3669 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3670 &cmd); 3671 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3672 sc->fw_multicast_support = 0; 3673 } else { 3674 sc->fw_multicast_support = 1; 3675 } 3676 3677 if (err != 0) { 3678 device_printf(sc->dev, "failed to setup params\n"); 3679 goto abort; 3680 } 3681 3682 for (slice = 0; slice < sc->num_slices; slice++) { 3683 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3684 if (err != 0) { 3685 device_printf(sc->dev, "couldn't open slice %d\n", 3686 slice); 3687 goto abort; 3688 } 3689 } 3690 3691 /* Finally, start the firmware running */ 3692 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3693 if (err) { 3694 device_printf(sc->dev, "Couldn't bring up link\n"); 3695 goto abort; 3696 } 3697 #ifdef IFNET_BUF_RING 3698 for (slice = 0; slice < sc->num_slices; slice++) { 3699 ss = &sc->ss[slice]; 3700 ss->if_drv_flags |= IFF_DRV_RUNNING; 3701 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3702 } 3703 #endif 3704 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3705 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3706 3707 return 0; 3708 3709 abort: 3710 mxge_free_mbufs(sc); 3711 3712 return err; 3713 } 3714 3715 static int 3716 mxge_close(mxge_softc_t *sc, int down) 3717 { 3718 mxge_cmd_t cmd; 3719 int err, old_down_cnt; 3720 #ifdef IFNET_BUF_RING 3721 struct mxge_slice_state *ss; 3722 int slice; 3723 #endif 3724 3725 #ifdef IFNET_BUF_RING 3726 for (slice = 0; slice < sc->num_slices; slice++) { 3727 ss = &sc->ss[slice]; 3728 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3729 } 3730 #endif 3731 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3732 if (!down) { 3733 old_down_cnt = sc->down_cnt; 3734 wmb(); 3735 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3736 if (err) { 3737 device_printf(sc->dev, 3738 "Couldn't bring down link\n"); 3739 } 3740 if (old_down_cnt == sc->down_cnt) { 3741 /* wait for down irq */ 3742 DELAY(10 * sc->intr_coal_delay); 3743 } 3744 wmb(); 3745 if (old_down_cnt == sc->down_cnt) { 3746 device_printf(sc->dev, "never got down irq\n"); 3747 } 3748 } 3749 mxge_free_mbufs(sc); 3750 3751 return 0; 3752 } 3753 3754 static void 3755 mxge_setup_cfg_space(mxge_softc_t *sc) 3756 { 3757 device_t dev = sc->dev; 3758 int reg; 3759 uint16_t lnk, pectl; 3760 3761 /* find the PCIe link width and set max read request to 4KB*/ 3762 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3763 lnk = pci_read_config(dev, reg + 0x12, 2); 3764 sc->link_width = (lnk >> 4) & 0x3f; 3765 3766 if (sc->pectl == 0) { 3767 pectl = pci_read_config(dev, reg + 0x8, 2); 3768 pectl = (pectl & ~0x7000) | (5 << 12); 3769 pci_write_config(dev, reg + 0x8, pectl, 2); 3770 sc->pectl = pectl; 3771 } else { 3772 /* restore saved pectl after watchdog reset */ 3773 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3774 } 3775 } 3776 3777 /* Enable DMA and Memory space access */ 3778 pci_enable_busmaster(dev); 3779 } 3780 3781 static uint32_t 3782 mxge_read_reboot(mxge_softc_t *sc) 3783 { 3784 device_t dev = sc->dev; 3785 uint32_t vs; 3786 3787 /* find the vendor specific offset */ 3788 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3789 device_printf(sc->dev, 3790 "could not find vendor specific offset\n"); 3791 return (uint32_t)-1; 3792 } 3793 /* enable read32 mode */ 3794 pci_write_config(dev, vs + 0x10, 0x3, 1); 3795 /* tell NIC which register to read */ 3796 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3797 return (pci_read_config(dev, vs + 0x14, 4)); 3798 } 3799 3800 static void 3801 mxge_watchdog_reset(mxge_softc_t *sc) 3802 { 3803 struct pci_devinfo *dinfo; 3804 struct mxge_slice_state *ss; 3805 int err, running, s, num_tx_slices = 1; 3806 uint32_t reboot; 3807 uint16_t cmd; 3808 3809 err = ENXIO; 3810 3811 device_printf(sc->dev, "Watchdog reset!\n"); 3812 3813 /* 3814 * check to see if the NIC rebooted. If it did, then all of 3815 * PCI config space has been reset, and things like the 3816 * busmaster bit will be zero. If this is the case, then we 3817 * must restore PCI config space before the NIC can be used 3818 * again 3819 */ 3820 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3821 if (cmd == 0xffff) { 3822 /* 3823 * maybe the watchdog caught the NIC rebooting; wait 3824 * up to 100ms for it to finish. If it does not come 3825 * back, then give up 3826 */ 3827 DELAY(1000*100); 3828 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3829 if (cmd == 0xffff) { 3830 device_printf(sc->dev, "NIC disappeared!\n"); 3831 } 3832 } 3833 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3834 /* print the reboot status */ 3835 reboot = mxge_read_reboot(sc); 3836 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3837 reboot); 3838 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3839 if (running) { 3840 /* 3841 * quiesce NIC so that TX routines will not try to 3842 * xmit after restoration of BAR 3843 */ 3844 3845 /* Mark the link as down */ 3846 if (sc->link_state) { 3847 sc->link_state = 0; 3848 if_link_state_change(sc->ifp, 3849 LINK_STATE_DOWN); 3850 } 3851 #ifdef IFNET_BUF_RING 3852 num_tx_slices = sc->num_slices; 3853 #endif 3854 /* grab all TX locks to ensure no tx */ 3855 for (s = 0; s < num_tx_slices; s++) { 3856 ss = &sc->ss[s]; 3857 mtx_lock(&ss->tx.mtx); 3858 } 3859 mxge_close(sc, 1); 3860 } 3861 /* restore PCI configuration space */ 3862 dinfo = device_get_ivars(sc->dev); 3863 pci_cfg_restore(sc->dev, dinfo); 3864 3865 /* and redo any changes we made to our config space */ 3866 mxge_setup_cfg_space(sc); 3867 3868 /* reload f/w */ 3869 err = mxge_load_firmware(sc, 0); 3870 if (err) { 3871 device_printf(sc->dev, 3872 "Unable to re-load f/w\n"); 3873 } 3874 if (running) { 3875 if (!err) 3876 err = mxge_open(sc); 3877 /* release all TX locks */ 3878 for (s = 0; s < num_tx_slices; s++) { 3879 ss = &sc->ss[s]; 3880 #ifdef IFNET_BUF_RING 3881 mxge_start_locked(ss); 3882 #endif 3883 mtx_unlock(&ss->tx.mtx); 3884 } 3885 } 3886 sc->watchdog_resets++; 3887 } else { 3888 device_printf(sc->dev, 3889 "NIC did not reboot, not resetting\n"); 3890 err = 0; 3891 } 3892 if (err) { 3893 device_printf(sc->dev, "watchdog reset failed\n"); 3894 } else { 3895 if (sc->dying == 2) 3896 sc->dying = 0; 3897 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3898 } 3899 } 3900 3901 static void 3902 mxge_watchdog_task(void *arg, int pending) 3903 { 3904 mxge_softc_t *sc = arg; 3905 3906 mtx_lock(&sc->driver_mtx); 3907 mxge_watchdog_reset(sc); 3908 mtx_unlock(&sc->driver_mtx); 3909 } 3910 3911 static void 3912 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3913 { 3914 tx = &sc->ss[slice].tx; 3915 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3916 device_printf(sc->dev, 3917 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3918 tx->req, tx->done, tx->queue_active); 3919 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3920 tx->activate, tx->deactivate); 3921 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3922 tx->pkt_done, 3923 be32toh(sc->ss->fw_stats->send_done_count)); 3924 } 3925 3926 static int 3927 mxge_watchdog(mxge_softc_t *sc) 3928 { 3929 mxge_tx_ring_t *tx; 3930 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3931 int i, err = 0; 3932 3933 /* see if we have outstanding transmits, which 3934 have been pending for more than mxge_ticks */ 3935 for (i = 0; 3936 #ifdef IFNET_BUF_RING 3937 (i < sc->num_slices) && (err == 0); 3938 #else 3939 (i < 1) && (err == 0); 3940 #endif 3941 i++) { 3942 tx = &sc->ss[i].tx; 3943 if (tx->req != tx->done && 3944 tx->watchdog_req != tx->watchdog_done && 3945 tx->done == tx->watchdog_done) { 3946 /* check for pause blocking before resetting */ 3947 if (tx->watchdog_rx_pause == rx_pause) { 3948 mxge_warn_stuck(sc, tx, i); 3949 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3950 return (ENXIO); 3951 } 3952 else 3953 device_printf(sc->dev, "Flow control blocking " 3954 "xmits, check link partner\n"); 3955 } 3956 3957 tx->watchdog_req = tx->req; 3958 tx->watchdog_done = tx->done; 3959 tx->watchdog_rx_pause = rx_pause; 3960 } 3961 3962 if (sc->need_media_probe) 3963 mxge_media_probe(sc); 3964 return (err); 3965 } 3966 3967 static uint64_t 3968 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 3969 { 3970 struct mxge_softc *sc; 3971 uint64_t rv; 3972 3973 sc = if_getsoftc(ifp); 3974 rv = 0; 3975 3976 switch (cnt) { 3977 case IFCOUNTER_IPACKETS: 3978 for (int s = 0; s < sc->num_slices; s++) 3979 rv += sc->ss[s].ipackets; 3980 return (rv); 3981 case IFCOUNTER_OPACKETS: 3982 for (int s = 0; s < sc->num_slices; s++) 3983 rv += sc->ss[s].opackets; 3984 return (rv); 3985 case IFCOUNTER_OERRORS: 3986 for (int s = 0; s < sc->num_slices; s++) 3987 rv += sc->ss[s].oerrors; 3988 return (rv); 3989 #ifdef IFNET_BUF_RING 3990 case IFCOUNTER_OBYTES: 3991 for (int s = 0; s < sc->num_slices; s++) 3992 rv += sc->ss[s].obytes; 3993 return (rv); 3994 case IFCOUNTER_OMCASTS: 3995 for (int s = 0; s < sc->num_slices; s++) 3996 rv += sc->ss[s].omcasts; 3997 return (rv); 3998 case IFCOUNTER_OQDROPS: 3999 for (int s = 0; s < sc->num_slices; s++) 4000 rv += sc->ss[s].tx.br->br_drops; 4001 return (rv); 4002 #endif 4003 default: 4004 return (if_get_counter_default(ifp, cnt)); 4005 } 4006 } 4007 4008 static void 4009 mxge_tick(void *arg) 4010 { 4011 mxge_softc_t *sc = arg; 4012 u_long pkts = 0; 4013 int err = 0; 4014 int running, ticks; 4015 uint16_t cmd; 4016 4017 ticks = mxge_ticks; 4018 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4019 if (running) { 4020 if (!sc->watchdog_countdown) { 4021 err = mxge_watchdog(sc); 4022 sc->watchdog_countdown = 4; 4023 } 4024 sc->watchdog_countdown--; 4025 } 4026 if (pkts == 0) { 4027 /* ensure NIC did not suffer h/w fault while idle */ 4028 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4029 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4030 sc->dying = 2; 4031 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4032 err = ENXIO; 4033 } 4034 /* look less often if NIC is idle */ 4035 ticks *= 4; 4036 } 4037 4038 if (err == 0) 4039 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4040 4041 } 4042 4043 static int 4044 mxge_media_change(struct ifnet *ifp) 4045 { 4046 return EINVAL; 4047 } 4048 4049 static int 4050 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4051 { 4052 struct ifnet *ifp = sc->ifp; 4053 int real_mtu, old_mtu; 4054 int err = 0; 4055 4056 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4057 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4058 return EINVAL; 4059 mtx_lock(&sc->driver_mtx); 4060 old_mtu = ifp->if_mtu; 4061 ifp->if_mtu = mtu; 4062 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4063 mxge_close(sc, 0); 4064 err = mxge_open(sc); 4065 if (err != 0) { 4066 ifp->if_mtu = old_mtu; 4067 mxge_close(sc, 0); 4068 (void) mxge_open(sc); 4069 } 4070 } 4071 mtx_unlock(&sc->driver_mtx); 4072 return err; 4073 } 4074 4075 static void 4076 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4077 { 4078 mxge_softc_t *sc = ifp->if_softc; 4079 4080 if (sc == NULL) 4081 return; 4082 ifmr->ifm_status = IFM_AVALID; 4083 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4084 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4085 ifmr->ifm_active |= sc->current_media; 4086 } 4087 4088 static int 4089 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 4090 { 4091 mxge_cmd_t cmd; 4092 uint32_t i2c_args; 4093 int i, ms, err; 4094 4095 if (i2c->dev_addr != 0xA0 && 4096 i2c->dev_addr != 0xA2) 4097 return (EINVAL); 4098 if (i2c->len > sizeof(i2c->data)) 4099 return (EINVAL); 4100 4101 for (i = 0; i < i2c->len; i++) { 4102 i2c_args = i2c->dev_addr << 0x8; 4103 i2c_args |= i2c->offset + i; 4104 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 4105 cmd.data1 = i2c_args; 4106 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 4107 4108 if (err != MXGEFW_CMD_OK) 4109 return (EIO); 4110 /* now we wait for the data to be cached */ 4111 cmd.data0 = i2c_args & 0xff; 4112 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4113 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 4114 cmd.data0 = i2c_args & 0xff; 4115 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4116 if (err == EBUSY) 4117 DELAY(1000); 4118 } 4119 if (err != MXGEFW_CMD_OK) 4120 return (EIO); 4121 i2c->data[i] = cmd.data0; 4122 } 4123 return (0); 4124 } 4125 4126 static int 4127 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4128 { 4129 mxge_softc_t *sc = ifp->if_softc; 4130 struct ifreq *ifr = (struct ifreq *)data; 4131 struct ifi2creq i2c; 4132 int err, mask; 4133 4134 err = 0; 4135 switch (command) { 4136 case SIOCSIFMTU: 4137 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4138 break; 4139 4140 case SIOCSIFFLAGS: 4141 mtx_lock(&sc->driver_mtx); 4142 if (sc->dying) { 4143 mtx_unlock(&sc->driver_mtx); 4144 return EINVAL; 4145 } 4146 if (ifp->if_flags & IFF_UP) { 4147 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4148 err = mxge_open(sc); 4149 } else { 4150 /* take care of promis can allmulti 4151 flag chages */ 4152 mxge_change_promisc(sc, 4153 ifp->if_flags & IFF_PROMISC); 4154 mxge_set_multicast_list(sc); 4155 } 4156 } else { 4157 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4158 mxge_close(sc, 0); 4159 } 4160 } 4161 mtx_unlock(&sc->driver_mtx); 4162 break; 4163 4164 case SIOCADDMULTI: 4165 case SIOCDELMULTI: 4166 mtx_lock(&sc->driver_mtx); 4167 if (sc->dying) { 4168 mtx_unlock(&sc->driver_mtx); 4169 return (EINVAL); 4170 } 4171 mxge_set_multicast_list(sc); 4172 mtx_unlock(&sc->driver_mtx); 4173 break; 4174 4175 case SIOCSIFCAP: 4176 mtx_lock(&sc->driver_mtx); 4177 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4178 if (mask & IFCAP_TXCSUM) { 4179 if (IFCAP_TXCSUM & ifp->if_capenable) { 4180 mask &= ~IFCAP_TSO4; 4181 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4182 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4183 } else { 4184 ifp->if_capenable |= IFCAP_TXCSUM; 4185 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4186 } 4187 } 4188 if (mask & IFCAP_RXCSUM) { 4189 if (IFCAP_RXCSUM & ifp->if_capenable) { 4190 ifp->if_capenable &= ~IFCAP_RXCSUM; 4191 } else { 4192 ifp->if_capenable |= IFCAP_RXCSUM; 4193 } 4194 } 4195 if (mask & IFCAP_TSO4) { 4196 if (IFCAP_TSO4 & ifp->if_capenable) { 4197 ifp->if_capenable &= ~IFCAP_TSO4; 4198 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4199 ifp->if_capenable |= IFCAP_TSO4; 4200 ifp->if_hwassist |= CSUM_TSO; 4201 } else { 4202 printf("mxge requires tx checksum offload" 4203 " be enabled to use TSO\n"); 4204 err = EINVAL; 4205 } 4206 } 4207 #if IFCAP_TSO6 4208 if (mask & IFCAP_TXCSUM_IPV6) { 4209 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4210 mask &= ~IFCAP_TSO6; 4211 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4212 | IFCAP_TSO6); 4213 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4214 | CSUM_UDP); 4215 } else { 4216 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4217 ifp->if_hwassist |= (CSUM_TCP_IPV6 4218 | CSUM_UDP_IPV6); 4219 } 4220 } 4221 if (mask & IFCAP_RXCSUM_IPV6) { 4222 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4223 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4224 } else { 4225 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4226 } 4227 } 4228 if (mask & IFCAP_TSO6) { 4229 if (IFCAP_TSO6 & ifp->if_capenable) { 4230 ifp->if_capenable &= ~IFCAP_TSO6; 4231 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4232 ifp->if_capenable |= IFCAP_TSO6; 4233 ifp->if_hwassist |= CSUM_TSO; 4234 } else { 4235 printf("mxge requires tx checksum offload" 4236 " be enabled to use TSO\n"); 4237 err = EINVAL; 4238 } 4239 } 4240 #endif /*IFCAP_TSO6 */ 4241 4242 if (mask & IFCAP_LRO) 4243 ifp->if_capenable ^= IFCAP_LRO; 4244 if (mask & IFCAP_VLAN_HWTAGGING) 4245 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4246 if (mask & IFCAP_VLAN_HWTSO) 4247 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4248 4249 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4250 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4251 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4252 4253 mtx_unlock(&sc->driver_mtx); 4254 VLAN_CAPABILITIES(ifp); 4255 4256 break; 4257 4258 case SIOCGIFMEDIA: 4259 mtx_lock(&sc->driver_mtx); 4260 if (sc->dying) { 4261 mtx_unlock(&sc->driver_mtx); 4262 return (EINVAL); 4263 } 4264 mxge_media_probe(sc); 4265 mtx_unlock(&sc->driver_mtx); 4266 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4267 &sc->media, command); 4268 break; 4269 4270 case SIOCGI2C: 4271 if (sc->connector != MXGE_XFP && 4272 sc->connector != MXGE_SFP) { 4273 err = ENXIO; 4274 break; 4275 } 4276 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4277 if (err != 0) 4278 break; 4279 mtx_lock(&sc->driver_mtx); 4280 if (sc->dying) { 4281 mtx_unlock(&sc->driver_mtx); 4282 return (EINVAL); 4283 } 4284 err = mxge_fetch_i2c(sc, &i2c); 4285 mtx_unlock(&sc->driver_mtx); 4286 if (err == 0) 4287 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4288 sizeof(i2c)); 4289 break; 4290 default: 4291 err = ether_ioctl(ifp, command, data); 4292 break; 4293 } 4294 return err; 4295 } 4296 4297 static void 4298 mxge_fetch_tunables(mxge_softc_t *sc) 4299 { 4300 4301 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4302 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4303 &mxge_flow_control); 4304 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4305 &mxge_intr_coal_delay); 4306 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4307 &mxge_nvidia_ecrc_enable); 4308 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4309 &mxge_force_firmware); 4310 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4311 &mxge_deassert_wait); 4312 TUNABLE_INT_FETCH("hw.mxge.verbose", 4313 &mxge_verbose); 4314 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4315 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4316 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4317 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4318 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4319 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4320 4321 if (bootverbose) 4322 mxge_verbose = 1; 4323 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4324 mxge_intr_coal_delay = 30; 4325 if (mxge_ticks == 0) 4326 mxge_ticks = hz / 2; 4327 sc->pause = mxge_flow_control; 4328 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4329 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4330 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4331 } 4332 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4333 mxge_initial_mtu < ETHER_MIN_LEN) 4334 mxge_initial_mtu = ETHERMTU_JUMBO; 4335 4336 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4337 mxge_throttle = MXGE_MAX_THROTTLE; 4338 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4339 mxge_throttle = MXGE_MIN_THROTTLE; 4340 sc->throttle = mxge_throttle; 4341 } 4342 4343 static void 4344 mxge_free_slices(mxge_softc_t *sc) 4345 { 4346 struct mxge_slice_state *ss; 4347 int i; 4348 4349 if (sc->ss == NULL) 4350 return; 4351 4352 for (i = 0; i < sc->num_slices; i++) { 4353 ss = &sc->ss[i]; 4354 if (ss->fw_stats != NULL) { 4355 mxge_dma_free(&ss->fw_stats_dma); 4356 ss->fw_stats = NULL; 4357 #ifdef IFNET_BUF_RING 4358 if (ss->tx.br != NULL) { 4359 drbr_free(ss->tx.br, M_DEVBUF); 4360 ss->tx.br = NULL; 4361 } 4362 #endif 4363 mtx_destroy(&ss->tx.mtx); 4364 } 4365 if (ss->rx_done.entry != NULL) { 4366 mxge_dma_free(&ss->rx_done.dma); 4367 ss->rx_done.entry = NULL; 4368 } 4369 } 4370 free(sc->ss, M_DEVBUF); 4371 sc->ss = NULL; 4372 } 4373 4374 static int 4375 mxge_alloc_slices(mxge_softc_t *sc) 4376 { 4377 mxge_cmd_t cmd; 4378 struct mxge_slice_state *ss; 4379 size_t bytes; 4380 int err, i, max_intr_slots; 4381 4382 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4383 if (err != 0) { 4384 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4385 return err; 4386 } 4387 sc->rx_ring_size = cmd.data0; 4388 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4389 4390 bytes = sizeof (*sc->ss) * sc->num_slices; 4391 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4392 if (sc->ss == NULL) 4393 return (ENOMEM); 4394 for (i = 0; i < sc->num_slices; i++) { 4395 ss = &sc->ss[i]; 4396 4397 ss->sc = sc; 4398 4399 /* allocate per-slice rx interrupt queues */ 4400 4401 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4402 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4403 if (err != 0) 4404 goto abort; 4405 ss->rx_done.entry = ss->rx_done.dma.addr; 4406 bzero(ss->rx_done.entry, bytes); 4407 4408 /* 4409 * allocate the per-slice firmware stats; stats 4410 * (including tx) are used used only on the first 4411 * slice for now 4412 */ 4413 #ifndef IFNET_BUF_RING 4414 if (i > 0) 4415 continue; 4416 #endif 4417 4418 bytes = sizeof (*ss->fw_stats); 4419 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4420 sizeof (*ss->fw_stats), 64); 4421 if (err != 0) 4422 goto abort; 4423 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4424 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4425 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4426 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4427 #ifdef IFNET_BUF_RING 4428 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4429 &ss->tx.mtx); 4430 #endif 4431 } 4432 4433 return (0); 4434 4435 abort: 4436 mxge_free_slices(sc); 4437 return (ENOMEM); 4438 } 4439 4440 static void 4441 mxge_slice_probe(mxge_softc_t *sc) 4442 { 4443 mxge_cmd_t cmd; 4444 char *old_fw; 4445 int msix_cnt, status, max_intr_slots; 4446 4447 sc->num_slices = 1; 4448 /* 4449 * don't enable multiple slices if they are not enabled, 4450 * or if this is not an SMP system 4451 */ 4452 4453 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4454 return; 4455 4456 /* see how many MSI-X interrupts are available */ 4457 msix_cnt = pci_msix_count(sc->dev); 4458 if (msix_cnt < 2) 4459 return; 4460 4461 /* now load the slice aware firmware see what it supports */ 4462 old_fw = sc->fw_name; 4463 if (old_fw == mxge_fw_aligned) 4464 sc->fw_name = mxge_fw_rss_aligned; 4465 else 4466 sc->fw_name = mxge_fw_rss_unaligned; 4467 status = mxge_load_firmware(sc, 0); 4468 if (status != 0) { 4469 device_printf(sc->dev, "Falling back to a single slice\n"); 4470 return; 4471 } 4472 4473 /* try to send a reset command to the card to see if it 4474 is alive */ 4475 memset(&cmd, 0, sizeof (cmd)); 4476 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4477 if (status != 0) { 4478 device_printf(sc->dev, "failed reset\n"); 4479 goto abort_with_fw; 4480 } 4481 4482 /* get rx ring size */ 4483 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4484 if (status != 0) { 4485 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4486 goto abort_with_fw; 4487 } 4488 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4489 4490 /* tell it the size of the interrupt queues */ 4491 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4492 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4493 if (status != 0) { 4494 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4495 goto abort_with_fw; 4496 } 4497 4498 /* ask the maximum number of slices it supports */ 4499 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4500 if (status != 0) { 4501 device_printf(sc->dev, 4502 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4503 goto abort_with_fw; 4504 } 4505 sc->num_slices = cmd.data0; 4506 if (sc->num_slices > msix_cnt) 4507 sc->num_slices = msix_cnt; 4508 4509 if (mxge_max_slices == -1) { 4510 /* cap to number of CPUs in system */ 4511 if (sc->num_slices > mp_ncpus) 4512 sc->num_slices = mp_ncpus; 4513 } else { 4514 if (sc->num_slices > mxge_max_slices) 4515 sc->num_slices = mxge_max_slices; 4516 } 4517 /* make sure it is a power of two */ 4518 while (sc->num_slices & (sc->num_slices - 1)) 4519 sc->num_slices--; 4520 4521 if (mxge_verbose) 4522 device_printf(sc->dev, "using %d slices\n", 4523 sc->num_slices); 4524 4525 return; 4526 4527 abort_with_fw: 4528 sc->fw_name = old_fw; 4529 (void) mxge_load_firmware(sc, 0); 4530 } 4531 4532 static int 4533 mxge_add_msix_irqs(mxge_softc_t *sc) 4534 { 4535 size_t bytes; 4536 int count, err, i, rid; 4537 4538 rid = PCIR_BAR(2); 4539 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4540 &rid, RF_ACTIVE); 4541 4542 if (sc->msix_table_res == NULL) { 4543 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4544 return ENXIO; 4545 } 4546 4547 count = sc->num_slices; 4548 err = pci_alloc_msix(sc->dev, &count); 4549 if (err != 0) { 4550 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4551 "err = %d \n", sc->num_slices, err); 4552 goto abort_with_msix_table; 4553 } 4554 if (count < sc->num_slices) { 4555 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4556 count, sc->num_slices); 4557 device_printf(sc->dev, 4558 "Try setting hw.mxge.max_slices to %d\n", 4559 count); 4560 err = ENOSPC; 4561 goto abort_with_msix; 4562 } 4563 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4564 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4565 if (sc->msix_irq_res == NULL) { 4566 err = ENOMEM; 4567 goto abort_with_msix; 4568 } 4569 4570 for (i = 0; i < sc->num_slices; i++) { 4571 rid = i + 1; 4572 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4573 SYS_RES_IRQ, 4574 &rid, RF_ACTIVE); 4575 if (sc->msix_irq_res[i] == NULL) { 4576 device_printf(sc->dev, "couldn't allocate IRQ res" 4577 " for message %d\n", i); 4578 err = ENXIO; 4579 goto abort_with_res; 4580 } 4581 } 4582 4583 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4584 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4585 4586 for (i = 0; i < sc->num_slices; i++) { 4587 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4588 INTR_TYPE_NET | INTR_MPSAFE, 4589 #if __FreeBSD_version > 700030 4590 NULL, 4591 #endif 4592 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4593 if (err != 0) { 4594 device_printf(sc->dev, "couldn't setup intr for " 4595 "message %d\n", i); 4596 goto abort_with_intr; 4597 } 4598 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4599 sc->msix_ih[i], "s%d", i); 4600 } 4601 4602 if (mxge_verbose) { 4603 device_printf(sc->dev, "using %d msix IRQs:", 4604 sc->num_slices); 4605 for (i = 0; i < sc->num_slices; i++) 4606 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4607 printf("\n"); 4608 } 4609 return (0); 4610 4611 abort_with_intr: 4612 for (i = 0; i < sc->num_slices; i++) { 4613 if (sc->msix_ih[i] != NULL) { 4614 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4615 sc->msix_ih[i]); 4616 sc->msix_ih[i] = NULL; 4617 } 4618 } 4619 free(sc->msix_ih, M_DEVBUF); 4620 4621 abort_with_res: 4622 for (i = 0; i < sc->num_slices; i++) { 4623 rid = i + 1; 4624 if (sc->msix_irq_res[i] != NULL) 4625 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4626 sc->msix_irq_res[i]); 4627 sc->msix_irq_res[i] = NULL; 4628 } 4629 free(sc->msix_irq_res, M_DEVBUF); 4630 4631 abort_with_msix: 4632 pci_release_msi(sc->dev); 4633 4634 abort_with_msix_table: 4635 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4636 sc->msix_table_res); 4637 4638 return err; 4639 } 4640 4641 static int 4642 mxge_add_single_irq(mxge_softc_t *sc) 4643 { 4644 int count, err, rid; 4645 4646 count = pci_msi_count(sc->dev); 4647 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4648 rid = 1; 4649 } else { 4650 rid = 0; 4651 sc->legacy_irq = 1; 4652 } 4653 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4654 RF_SHAREABLE | RF_ACTIVE); 4655 if (sc->irq_res == NULL) { 4656 device_printf(sc->dev, "could not alloc interrupt\n"); 4657 return ENXIO; 4658 } 4659 if (mxge_verbose) 4660 device_printf(sc->dev, "using %s irq %jd\n", 4661 sc->legacy_irq ? "INTx" : "MSI", 4662 rman_get_start(sc->irq_res)); 4663 err = bus_setup_intr(sc->dev, sc->irq_res, 4664 INTR_TYPE_NET | INTR_MPSAFE, 4665 #if __FreeBSD_version > 700030 4666 NULL, 4667 #endif 4668 mxge_intr, &sc->ss[0], &sc->ih); 4669 if (err != 0) { 4670 bus_release_resource(sc->dev, SYS_RES_IRQ, 4671 sc->legacy_irq ? 0 : 1, sc->irq_res); 4672 if (!sc->legacy_irq) 4673 pci_release_msi(sc->dev); 4674 } 4675 return err; 4676 } 4677 4678 static void 4679 mxge_rem_msix_irqs(mxge_softc_t *sc) 4680 { 4681 int i, rid; 4682 4683 for (i = 0; i < sc->num_slices; i++) { 4684 if (sc->msix_ih[i] != NULL) { 4685 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4686 sc->msix_ih[i]); 4687 sc->msix_ih[i] = NULL; 4688 } 4689 } 4690 free(sc->msix_ih, M_DEVBUF); 4691 4692 for (i = 0; i < sc->num_slices; i++) { 4693 rid = i + 1; 4694 if (sc->msix_irq_res[i] != NULL) 4695 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4696 sc->msix_irq_res[i]); 4697 sc->msix_irq_res[i] = NULL; 4698 } 4699 free(sc->msix_irq_res, M_DEVBUF); 4700 4701 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4702 sc->msix_table_res); 4703 4704 pci_release_msi(sc->dev); 4705 return; 4706 } 4707 4708 static void 4709 mxge_rem_single_irq(mxge_softc_t *sc) 4710 { 4711 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4712 bus_release_resource(sc->dev, SYS_RES_IRQ, 4713 sc->legacy_irq ? 0 : 1, sc->irq_res); 4714 if (!sc->legacy_irq) 4715 pci_release_msi(sc->dev); 4716 } 4717 4718 static void 4719 mxge_rem_irq(mxge_softc_t *sc) 4720 { 4721 if (sc->num_slices > 1) 4722 mxge_rem_msix_irqs(sc); 4723 else 4724 mxge_rem_single_irq(sc); 4725 } 4726 4727 static int 4728 mxge_add_irq(mxge_softc_t *sc) 4729 { 4730 int err; 4731 4732 if (sc->num_slices > 1) 4733 err = mxge_add_msix_irqs(sc); 4734 else 4735 err = mxge_add_single_irq(sc); 4736 4737 if (0 && err == 0 && sc->num_slices > 1) { 4738 mxge_rem_msix_irqs(sc); 4739 err = mxge_add_msix_irqs(sc); 4740 } 4741 return err; 4742 } 4743 4744 static int 4745 mxge_attach(device_t dev) 4746 { 4747 mxge_cmd_t cmd; 4748 mxge_softc_t *sc = device_get_softc(dev); 4749 struct ifnet *ifp; 4750 int err, rid; 4751 4752 sc->dev = dev; 4753 mxge_fetch_tunables(sc); 4754 4755 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4756 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4757 taskqueue_thread_enqueue, &sc->tq); 4758 if (sc->tq == NULL) { 4759 err = ENOMEM; 4760 goto abort_with_nothing; 4761 } 4762 4763 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4764 1, /* alignment */ 4765 0, /* boundary */ 4766 BUS_SPACE_MAXADDR, /* low */ 4767 BUS_SPACE_MAXADDR, /* high */ 4768 NULL, NULL, /* filter */ 4769 65536 + 256, /* maxsize */ 4770 MXGE_MAX_SEND_DESC, /* num segs */ 4771 65536, /* maxsegsize */ 4772 0, /* flags */ 4773 NULL, NULL, /* lock */ 4774 &sc->parent_dmat); /* tag */ 4775 4776 if (err != 0) { 4777 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4778 err); 4779 goto abort_with_tq; 4780 } 4781 4782 ifp = sc->ifp = if_alloc(IFT_ETHER); 4783 if (ifp == NULL) { 4784 device_printf(dev, "can not if_alloc()\n"); 4785 err = ENOSPC; 4786 goto abort_with_parent_dmat; 4787 } 4788 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4789 4790 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4791 device_get_nameunit(dev)); 4792 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4793 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4794 "%s:drv", device_get_nameunit(dev)); 4795 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4796 MTX_NETWORK_LOCK, MTX_DEF); 4797 4798 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4799 4800 mxge_setup_cfg_space(sc); 4801 4802 /* Map the board into the kernel */ 4803 rid = PCIR_BARS; 4804 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4805 RF_ACTIVE); 4806 if (sc->mem_res == NULL) { 4807 device_printf(dev, "could not map memory\n"); 4808 err = ENXIO; 4809 goto abort_with_lock; 4810 } 4811 sc->sram = rman_get_virtual(sc->mem_res); 4812 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4813 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4814 device_printf(dev, "impossible memory region size %jd\n", 4815 rman_get_size(sc->mem_res)); 4816 err = ENXIO; 4817 goto abort_with_mem_res; 4818 } 4819 4820 /* make NULL terminated copy of the EEPROM strings section of 4821 lanai SRAM */ 4822 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4823 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4824 rman_get_bushandle(sc->mem_res), 4825 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4826 sc->eeprom_strings, 4827 MXGE_EEPROM_STRINGS_SIZE - 2); 4828 err = mxge_parse_strings(sc); 4829 if (err != 0) 4830 goto abort_with_mem_res; 4831 4832 /* Enable write combining for efficient use of PCIe bus */ 4833 mxge_enable_wc(sc); 4834 4835 /* Allocate the out of band dma memory */ 4836 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4837 sizeof (mxge_cmd_t), 64); 4838 if (err != 0) 4839 goto abort_with_mem_res; 4840 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4841 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4842 if (err != 0) 4843 goto abort_with_cmd_dma; 4844 4845 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4846 if (err != 0) 4847 goto abort_with_zeropad_dma; 4848 4849 /* select & load the firmware */ 4850 err = mxge_select_firmware(sc); 4851 if (err != 0) 4852 goto abort_with_dmabench; 4853 sc->intr_coal_delay = mxge_intr_coal_delay; 4854 4855 mxge_slice_probe(sc); 4856 err = mxge_alloc_slices(sc); 4857 if (err != 0) 4858 goto abort_with_dmabench; 4859 4860 err = mxge_reset(sc, 0); 4861 if (err != 0) 4862 goto abort_with_slices; 4863 4864 err = mxge_alloc_rings(sc); 4865 if (err != 0) { 4866 device_printf(sc->dev, "failed to allocate rings\n"); 4867 goto abort_with_slices; 4868 } 4869 4870 err = mxge_add_irq(sc); 4871 if (err != 0) { 4872 device_printf(sc->dev, "failed to add irq\n"); 4873 goto abort_with_rings; 4874 } 4875 4876 ifp->if_baudrate = IF_Gbps(10); 4877 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4878 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4879 IFCAP_RXCSUM_IPV6; 4880 #if defined(INET) || defined(INET6) 4881 ifp->if_capabilities |= IFCAP_LRO; 4882 #endif 4883 4884 #ifdef MXGE_NEW_VLAN_API 4885 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4886 4887 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4888 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4889 sc->fw_ver_tiny >= 32) 4890 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4891 #endif 4892 sc->max_mtu = mxge_max_mtu(sc); 4893 if (sc->max_mtu >= 9000) 4894 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4895 else 4896 device_printf(dev, "MTU limited to %d. Install " 4897 "latest firmware for 9000 byte jumbo support\n", 4898 sc->max_mtu - ETHER_HDR_LEN); 4899 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4900 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4901 /* check to see if f/w supports TSO for IPv6 */ 4902 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4903 if (CSUM_TCP_IPV6) 4904 ifp->if_capabilities |= IFCAP_TSO6; 4905 sc->max_tso6_hlen = min(cmd.data0, 4906 sizeof (sc->ss[0].scratch)); 4907 } 4908 ifp->if_capenable = ifp->if_capabilities; 4909 if (sc->lro_cnt == 0) 4910 ifp->if_capenable &= ~IFCAP_LRO; 4911 ifp->if_init = mxge_init; 4912 ifp->if_softc = sc; 4913 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4914 ifp->if_ioctl = mxge_ioctl; 4915 ifp->if_start = mxge_start; 4916 ifp->if_get_counter = mxge_get_counter; 4917 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4918 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4919 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4920 /* Initialise the ifmedia structure */ 4921 ifmedia_init(&sc->media, 0, mxge_media_change, 4922 mxge_media_status); 4923 mxge_media_init(sc); 4924 mxge_media_probe(sc); 4925 sc->dying = 0; 4926 ether_ifattach(ifp, sc->mac_addr); 4927 /* ether_ifattach sets mtu to ETHERMTU */ 4928 if (mxge_initial_mtu != ETHERMTU) 4929 mxge_change_mtu(sc, mxge_initial_mtu); 4930 4931 mxge_add_sysctls(sc); 4932 #ifdef IFNET_BUF_RING 4933 ifp->if_transmit = mxge_transmit; 4934 ifp->if_qflush = mxge_qflush; 4935 #endif 4936 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4937 device_get_nameunit(sc->dev)); 4938 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4939 return 0; 4940 4941 abort_with_rings: 4942 mxge_free_rings(sc); 4943 abort_with_slices: 4944 mxge_free_slices(sc); 4945 abort_with_dmabench: 4946 mxge_dma_free(&sc->dmabench_dma); 4947 abort_with_zeropad_dma: 4948 mxge_dma_free(&sc->zeropad_dma); 4949 abort_with_cmd_dma: 4950 mxge_dma_free(&sc->cmd_dma); 4951 abort_with_mem_res: 4952 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4953 abort_with_lock: 4954 pci_disable_busmaster(dev); 4955 mtx_destroy(&sc->cmd_mtx); 4956 mtx_destroy(&sc->driver_mtx); 4957 if_free(ifp); 4958 abort_with_parent_dmat: 4959 bus_dma_tag_destroy(sc->parent_dmat); 4960 abort_with_tq: 4961 if (sc->tq != NULL) { 4962 taskqueue_drain(sc->tq, &sc->watchdog_task); 4963 taskqueue_free(sc->tq); 4964 sc->tq = NULL; 4965 } 4966 abort_with_nothing: 4967 return err; 4968 } 4969 4970 static int 4971 mxge_detach(device_t dev) 4972 { 4973 mxge_softc_t *sc = device_get_softc(dev); 4974 4975 if (mxge_vlans_active(sc)) { 4976 device_printf(sc->dev, 4977 "Detach vlans before removing module\n"); 4978 return EBUSY; 4979 } 4980 mtx_lock(&sc->driver_mtx); 4981 sc->dying = 1; 4982 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4983 mxge_close(sc, 0); 4984 mtx_unlock(&sc->driver_mtx); 4985 ether_ifdetach(sc->ifp); 4986 if (sc->tq != NULL) { 4987 taskqueue_drain(sc->tq, &sc->watchdog_task); 4988 taskqueue_free(sc->tq); 4989 sc->tq = NULL; 4990 } 4991 callout_drain(&sc->co_hdl); 4992 ifmedia_removeall(&sc->media); 4993 mxge_dummy_rdma(sc, 0); 4994 mxge_rem_sysctls(sc); 4995 mxge_rem_irq(sc); 4996 mxge_free_rings(sc); 4997 mxge_free_slices(sc); 4998 mxge_dma_free(&sc->dmabench_dma); 4999 mxge_dma_free(&sc->zeropad_dma); 5000 mxge_dma_free(&sc->cmd_dma); 5001 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5002 pci_disable_busmaster(dev); 5003 mtx_destroy(&sc->cmd_mtx); 5004 mtx_destroy(&sc->driver_mtx); 5005 if_free(sc->ifp); 5006 bus_dma_tag_destroy(sc->parent_dmat); 5007 return 0; 5008 } 5009 5010 static int 5011 mxge_shutdown(device_t dev) 5012 { 5013 return 0; 5014 } 5015 5016 /* 5017 This file uses Myri10GE driver indentation. 5018 5019 Local Variables: 5020 c-file-style:"linux" 5021 tab-width:8 5022 End: 5023 */ 5024