1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <sys/zlib.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/if_arp.h> 55 #include <net/ethernet.h> 56 #include <net/if_dl.h> 57 #include <net/if_media.h> 58 59 #include <net/bpf.h> 60 61 #include <net/if_types.h> 62 #include <net/if_vlan_var.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip6.h> 68 #include <netinet/tcp.h> 69 #include <netinet/tcp_lro.h> 70 #include <netinet6/ip6_var.h> 71 72 #include <machine/bus.h> 73 #include <machine/in_cksum.h> 74 #include <machine/resource.h> 75 #include <sys/bus.h> 76 #include <sys/rman.h> 77 #include <sys/smp.h> 78 79 #include <dev/pci/pcireg.h> 80 #include <dev/pci/pcivar.h> 81 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 82 83 #include <vm/vm.h> /* for pmap_mapdev() */ 84 #include <vm/pmap.h> 85 86 #if defined(__i386) || defined(__amd64) 87 #include <machine/specialreg.h> 88 #endif 89 90 #include <dev/mxge/mxge_mcp.h> 91 #include <dev/mxge/mcp_gen_header.h> 92 /*#define MXGE_FAKE_IFP*/ 93 #include <dev/mxge/if_mxge_var.h> 94 #ifdef IFNET_BUF_RING 95 #include <sys/buf_ring.h> 96 #endif 97 98 #include "opt_inet.h" 99 #include "opt_inet6.h" 100 101 /* tunable params */ 102 static int mxge_nvidia_ecrc_enable = 1; 103 static int mxge_force_firmware = 0; 104 static int mxge_intr_coal_delay = 30; 105 static int mxge_deassert_wait = 1; 106 static int mxge_flow_control = 1; 107 static int mxge_verbose = 0; 108 static int mxge_ticks; 109 static int mxge_max_slices = 1; 110 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 111 static int mxge_always_promisc = 0; 112 static int mxge_initial_mtu = ETHERMTU_JUMBO; 113 static int mxge_throttle = 0; 114 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 115 static char *mxge_fw_aligned = "mxge_eth_z8e"; 116 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 117 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 118 119 static int mxge_probe(device_t dev); 120 static int mxge_attach(device_t dev); 121 static int mxge_detach(device_t dev); 122 static int mxge_shutdown(device_t dev); 123 static void mxge_intr(void *arg); 124 125 static device_method_t mxge_methods[] = 126 { 127 /* Device interface */ 128 DEVMETHOD(device_probe, mxge_probe), 129 DEVMETHOD(device_attach, mxge_attach), 130 DEVMETHOD(device_detach, mxge_detach), 131 DEVMETHOD(device_shutdown, mxge_shutdown), 132 133 DEVMETHOD_END 134 }; 135 136 static driver_t mxge_driver = 137 { 138 "mxge", 139 mxge_methods, 140 sizeof(mxge_softc_t), 141 }; 142 143 static devclass_t mxge_devclass; 144 145 /* Declare ourselves to be a child of the PCI bus.*/ 146 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 147 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 148 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 149 150 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 151 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 152 static int mxge_close(mxge_softc_t *sc, int down); 153 static int mxge_open(mxge_softc_t *sc); 154 static void mxge_tick(void *arg); 155 156 static int 157 mxge_probe(device_t dev) 158 { 159 int rev; 160 161 162 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 163 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 164 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 165 rev = pci_get_revid(dev); 166 switch (rev) { 167 case MXGE_PCI_REV_Z8E: 168 device_set_desc(dev, "Myri10G-PCIE-8A"); 169 break; 170 case MXGE_PCI_REV_Z8ES: 171 device_set_desc(dev, "Myri10G-PCIE-8B"); 172 break; 173 default: 174 device_set_desc(dev, "Myri10G-PCIE-8??"); 175 device_printf(dev, "Unrecognized rev %d NIC\n", 176 rev); 177 break; 178 } 179 return 0; 180 } 181 return ENXIO; 182 } 183 184 static void 185 mxge_enable_wc(mxge_softc_t *sc) 186 { 187 #if defined(__i386) || defined(__amd64) 188 vm_offset_t len; 189 int err; 190 191 sc->wc = 1; 192 len = rman_get_size(sc->mem_res); 193 err = pmap_change_attr((vm_offset_t) sc->sram, 194 len, PAT_WRITE_COMBINING); 195 if (err != 0) { 196 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 197 err); 198 sc->wc = 0; 199 } 200 #endif 201 } 202 203 204 /* callback to get our DMA address */ 205 static void 206 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 207 int error) 208 { 209 if (error == 0) { 210 *(bus_addr_t *) arg = segs->ds_addr; 211 } 212 } 213 214 static int 215 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 216 bus_size_t alignment) 217 { 218 int err; 219 device_t dev = sc->dev; 220 bus_size_t boundary, maxsegsize; 221 222 if (bytes > 4096 && alignment == 4096) { 223 boundary = 0; 224 maxsegsize = bytes; 225 } else { 226 boundary = 4096; 227 maxsegsize = 4096; 228 } 229 230 /* allocate DMAable memory tags */ 231 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 232 alignment, /* alignment */ 233 boundary, /* boundary */ 234 BUS_SPACE_MAXADDR, /* low */ 235 BUS_SPACE_MAXADDR, /* high */ 236 NULL, NULL, /* filter */ 237 bytes, /* maxsize */ 238 1, /* num segs */ 239 maxsegsize, /* maxsegsize */ 240 BUS_DMA_COHERENT, /* flags */ 241 NULL, NULL, /* lock */ 242 &dma->dmat); /* tag */ 243 if (err != 0) { 244 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 245 return err; 246 } 247 248 /* allocate DMAable memory & map */ 249 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 250 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 251 | BUS_DMA_ZERO), &dma->map); 252 if (err != 0) { 253 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 254 goto abort_with_dmat; 255 } 256 257 /* load the memory */ 258 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 259 mxge_dmamap_callback, 260 (void *)&dma->bus_addr, 0); 261 if (err != 0) { 262 device_printf(dev, "couldn't load map (err = %d)\n", err); 263 goto abort_with_mem; 264 } 265 return 0; 266 267 abort_with_mem: 268 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 269 abort_with_dmat: 270 (void)bus_dma_tag_destroy(dma->dmat); 271 return err; 272 } 273 274 275 static void 276 mxge_dma_free(mxge_dma_t *dma) 277 { 278 bus_dmamap_unload(dma->dmat, dma->map); 279 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 280 (void)bus_dma_tag_destroy(dma->dmat); 281 } 282 283 /* 284 * The eeprom strings on the lanaiX have the format 285 * SN=x\0 286 * MAC=x:x:x:x:x:x\0 287 * PC=text\0 288 */ 289 290 static int 291 mxge_parse_strings(mxge_softc_t *sc) 292 { 293 char *ptr; 294 int i, found_mac, found_sn2; 295 char *endptr; 296 297 ptr = sc->eeprom_strings; 298 found_mac = 0; 299 found_sn2 = 0; 300 while (*ptr != '\0') { 301 if (strncmp(ptr, "MAC=", 4) == 0) { 302 ptr += 4; 303 for (i = 0;;) { 304 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 305 if (endptr - ptr != 2) 306 goto abort; 307 ptr = endptr; 308 if (++i == 6) 309 break; 310 if (*ptr++ != ':') 311 goto abort; 312 } 313 found_mac = 1; 314 } else if (strncmp(ptr, "PC=", 3) == 0) { 315 ptr += 3; 316 strlcpy(sc->product_code_string, ptr, 317 sizeof(sc->product_code_string)); 318 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 319 ptr += 3; 320 strlcpy(sc->serial_number_string, ptr, 321 sizeof(sc->serial_number_string)); 322 } else if (strncmp(ptr, "SN2=", 4) == 0) { 323 /* SN2 takes precedence over SN */ 324 ptr += 4; 325 found_sn2 = 1; 326 strlcpy(sc->serial_number_string, ptr, 327 sizeof(sc->serial_number_string)); 328 } 329 while (*ptr++ != '\0') {} 330 } 331 332 if (found_mac) 333 return 0; 334 335 abort: 336 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 337 338 return ENXIO; 339 } 340 341 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 342 static void 343 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 344 { 345 uint32_t val; 346 unsigned long base, off; 347 char *va, *cfgptr; 348 device_t pdev, mcp55; 349 uint16_t vendor_id, device_id, word; 350 uintptr_t bus, slot, func, ivend, idev; 351 uint32_t *ptr32; 352 353 354 if (!mxge_nvidia_ecrc_enable) 355 return; 356 357 pdev = device_get_parent(device_get_parent(sc->dev)); 358 if (pdev == NULL) { 359 device_printf(sc->dev, "could not find parent?\n"); 360 return; 361 } 362 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 363 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 364 365 if (vendor_id != 0x10de) 366 return; 367 368 base = 0; 369 370 if (device_id == 0x005d) { 371 /* ck804, base address is magic */ 372 base = 0xe0000000UL; 373 } else if (device_id >= 0x0374 && device_id <= 0x378) { 374 /* mcp55, base address stored in chipset */ 375 mcp55 = pci_find_bsf(0, 0, 0); 376 if (mcp55 && 377 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 378 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 379 word = pci_read_config(mcp55, 0x90, 2); 380 base = ((unsigned long)word & 0x7ffeU) << 25; 381 } 382 } 383 if (!base) 384 return; 385 386 /* XXXX 387 Test below is commented because it is believed that doing 388 config read/write beyond 0xff will access the config space 389 for the next larger function. Uncomment this and remove 390 the hacky pmap_mapdev() way of accessing config space when 391 FreeBSD grows support for extended pcie config space access 392 */ 393 #if 0 394 /* See if we can, by some miracle, access the extended 395 config space */ 396 val = pci_read_config(pdev, 0x178, 4); 397 if (val != 0xffffffff) { 398 val |= 0x40; 399 pci_write_config(pdev, 0x178, val, 4); 400 return; 401 } 402 #endif 403 /* Rather than using normal pci config space writes, we must 404 * map the Nvidia config space ourselves. This is because on 405 * opteron/nvidia class machine the 0xe000000 mapping is 406 * handled by the nvidia chipset, that means the internal PCI 407 * device (the on-chip northbridge), or the amd-8131 bridge 408 * and things behind them are not visible by this method. 409 */ 410 411 BUS_READ_IVAR(device_get_parent(pdev), pdev, 412 PCI_IVAR_BUS, &bus); 413 BUS_READ_IVAR(device_get_parent(pdev), pdev, 414 PCI_IVAR_SLOT, &slot); 415 BUS_READ_IVAR(device_get_parent(pdev), pdev, 416 PCI_IVAR_FUNCTION, &func); 417 BUS_READ_IVAR(device_get_parent(pdev), pdev, 418 PCI_IVAR_VENDOR, &ivend); 419 BUS_READ_IVAR(device_get_parent(pdev), pdev, 420 PCI_IVAR_DEVICE, &idev); 421 422 off = base 423 + 0x00100000UL * (unsigned long)bus 424 + 0x00001000UL * (unsigned long)(func 425 + 8 * slot); 426 427 /* map it into the kernel */ 428 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 429 430 431 if (va == NULL) { 432 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 433 return; 434 } 435 /* get a pointer to the config space mapped into the kernel */ 436 cfgptr = va + (off & PAGE_MASK); 437 438 /* make sure that we can really access it */ 439 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 440 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 441 if (! (vendor_id == ivend && device_id == idev)) { 442 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 443 vendor_id, device_id); 444 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 445 return; 446 } 447 448 ptr32 = (uint32_t*)(cfgptr + 0x178); 449 val = *ptr32; 450 451 if (val == 0xffffffff) { 452 device_printf(sc->dev, "extended mapping failed\n"); 453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 454 return; 455 } 456 *ptr32 = val | 0x40; 457 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 458 if (mxge_verbose) 459 device_printf(sc->dev, 460 "Enabled ECRC on upstream Nvidia bridge " 461 "at %d:%d:%d\n", 462 (int)bus, (int)slot, (int)func); 463 return; 464 } 465 #else 466 static void 467 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 468 { 469 device_printf(sc->dev, 470 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 471 return; 472 } 473 #endif 474 475 476 static int 477 mxge_dma_test(mxge_softc_t *sc, int test_type) 478 { 479 mxge_cmd_t cmd; 480 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 481 int status; 482 uint32_t len; 483 char *test = " "; 484 485 486 /* Run a small DMA test. 487 * The magic multipliers to the length tell the firmware 488 * to do DMA read, write, or read+write tests. The 489 * results are returned in cmd.data0. The upper 16 490 * bits of the return is the number of transfers completed. 491 * The lower 16 bits is the time in 0.5us ticks that the 492 * transfers took to complete. 493 */ 494 495 len = sc->tx_boundary; 496 497 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 498 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 499 cmd.data2 = len * 0x10000; 500 status = mxge_send_cmd(sc, test_type, &cmd); 501 if (status != 0) { 502 test = "read"; 503 goto abort; 504 } 505 sc->read_dma = ((cmd.data0>>16) * len * 2) / 506 (cmd.data0 & 0xffff); 507 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 508 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 509 cmd.data2 = len * 0x1; 510 status = mxge_send_cmd(sc, test_type, &cmd); 511 if (status != 0) { 512 test = "write"; 513 goto abort; 514 } 515 sc->write_dma = ((cmd.data0>>16) * len * 2) / 516 (cmd.data0 & 0xffff); 517 518 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 519 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 520 cmd.data2 = len * 0x10001; 521 status = mxge_send_cmd(sc, test_type, &cmd); 522 if (status != 0) { 523 test = "read/write"; 524 goto abort; 525 } 526 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 527 (cmd.data0 & 0xffff); 528 529 abort: 530 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 531 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 532 test, status); 533 534 return status; 535 } 536 537 /* 538 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 539 * when the PCI-E Completion packets are aligned on an 8-byte 540 * boundary. Some PCI-E chip sets always align Completion packets; on 541 * the ones that do not, the alignment can be enforced by enabling 542 * ECRC generation (if supported). 543 * 544 * When PCI-E Completion packets are not aligned, it is actually more 545 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 546 * 547 * If the driver can neither enable ECRC nor verify that it has 548 * already been enabled, then it must use a firmware image which works 549 * around unaligned completion packets (ethp_z8e.dat), and it should 550 * also ensure that it never gives the device a Read-DMA which is 551 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 552 * enabled, then the driver should use the aligned (eth_z8e.dat) 553 * firmware image, and set tx_boundary to 4KB. 554 */ 555 556 static int 557 mxge_firmware_probe(mxge_softc_t *sc) 558 { 559 device_t dev = sc->dev; 560 int reg, status; 561 uint16_t pectl; 562 563 sc->tx_boundary = 4096; 564 /* 565 * Verify the max read request size was set to 4KB 566 * before trying the test with 4KB. 567 */ 568 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 569 pectl = pci_read_config(dev, reg + 0x8, 2); 570 if ((pectl & (5 << 12)) != (5 << 12)) { 571 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 572 pectl); 573 sc->tx_boundary = 2048; 574 } 575 } 576 577 /* 578 * load the optimized firmware (which assumes aligned PCIe 579 * completions) in order to see if it works on this host. 580 */ 581 sc->fw_name = mxge_fw_aligned; 582 status = mxge_load_firmware(sc, 1); 583 if (status != 0) { 584 return status; 585 } 586 587 /* 588 * Enable ECRC if possible 589 */ 590 mxge_enable_nvidia_ecrc(sc); 591 592 /* 593 * Run a DMA test which watches for unaligned completions and 594 * aborts on the first one seen. Not required on Z8ES or newer. 595 */ 596 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 597 return 0; 598 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 599 if (status == 0) 600 return 0; /* keep the aligned firmware */ 601 602 if (status != E2BIG) 603 device_printf(dev, "DMA test failed: %d\n", status); 604 if (status == ENOSYS) 605 device_printf(dev, "Falling back to ethp! " 606 "Please install up to date fw\n"); 607 return status; 608 } 609 610 static int 611 mxge_select_firmware(mxge_softc_t *sc) 612 { 613 int aligned = 0; 614 int force_firmware = mxge_force_firmware; 615 616 if (sc->throttle) 617 force_firmware = sc->throttle; 618 619 if (force_firmware != 0) { 620 if (force_firmware == 1) 621 aligned = 1; 622 else 623 aligned = 0; 624 if (mxge_verbose) 625 device_printf(sc->dev, 626 "Assuming %s completions (forced)\n", 627 aligned ? "aligned" : "unaligned"); 628 goto abort; 629 } 630 631 /* if the PCIe link width is 4 or less, we can use the aligned 632 firmware and skip any checks */ 633 if (sc->link_width != 0 && sc->link_width <= 4) { 634 device_printf(sc->dev, 635 "PCIe x%d Link, expect reduced performance\n", 636 sc->link_width); 637 aligned = 1; 638 goto abort; 639 } 640 641 if (0 == mxge_firmware_probe(sc)) 642 return 0; 643 644 abort: 645 if (aligned) { 646 sc->fw_name = mxge_fw_aligned; 647 sc->tx_boundary = 4096; 648 } else { 649 sc->fw_name = mxge_fw_unaligned; 650 sc->tx_boundary = 2048; 651 } 652 return (mxge_load_firmware(sc, 0)); 653 } 654 655 static int 656 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 657 { 658 659 660 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 661 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 662 be32toh(hdr->mcp_type)); 663 return EIO; 664 } 665 666 /* save firmware version for sysctl */ 667 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 668 if (mxge_verbose) 669 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 670 671 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 672 &sc->fw_ver_minor, &sc->fw_ver_tiny); 673 674 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 675 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 676 device_printf(sc->dev, "Found firmware version %s\n", 677 sc->fw_version); 678 device_printf(sc->dev, "Driver needs %d.%d\n", 679 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 680 return EINVAL; 681 } 682 return 0; 683 684 } 685 686 static void * 687 z_alloc(void *nil, u_int items, u_int size) 688 { 689 void *ptr; 690 691 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 692 return ptr; 693 } 694 695 static void 696 z_free(void *nil, void *ptr) 697 { 698 free(ptr, M_TEMP); 699 } 700 701 702 static int 703 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 704 { 705 z_stream zs; 706 char *inflate_buffer; 707 const struct firmware *fw; 708 const mcp_gen_header_t *hdr; 709 unsigned hdr_offset; 710 int status; 711 unsigned int i; 712 char dummy; 713 size_t fw_len; 714 715 fw = firmware_get(sc->fw_name); 716 if (fw == NULL) { 717 device_printf(sc->dev, "Could not find firmware image %s\n", 718 sc->fw_name); 719 return ENOENT; 720 } 721 722 723 724 /* setup zlib and decompress f/w */ 725 bzero(&zs, sizeof (zs)); 726 zs.zalloc = z_alloc; 727 zs.zfree = z_free; 728 status = inflateInit(&zs); 729 if (status != Z_OK) { 730 status = EIO; 731 goto abort_with_fw; 732 } 733 734 /* the uncompressed size is stored as the firmware version, 735 which would otherwise go unused */ 736 fw_len = (size_t) fw->version; 737 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 738 if (inflate_buffer == NULL) 739 goto abort_with_zs; 740 zs.avail_in = fw->datasize; 741 zs.next_in = __DECONST(char *, fw->data); 742 zs.avail_out = fw_len; 743 zs.next_out = inflate_buffer; 744 status = inflate(&zs, Z_FINISH); 745 if (status != Z_STREAM_END) { 746 device_printf(sc->dev, "zlib %d\n", status); 747 status = EIO; 748 goto abort_with_buffer; 749 } 750 751 /* check id */ 752 hdr_offset = htobe32(*(const uint32_t *) 753 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 754 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 755 device_printf(sc->dev, "Bad firmware file"); 756 status = EIO; 757 goto abort_with_buffer; 758 } 759 hdr = (const void*)(inflate_buffer + hdr_offset); 760 761 status = mxge_validate_firmware(sc, hdr); 762 if (status != 0) 763 goto abort_with_buffer; 764 765 /* Copy the inflated firmware to NIC SRAM. */ 766 for (i = 0; i < fw_len; i += 256) { 767 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 768 inflate_buffer + i, 769 min(256U, (unsigned)(fw_len - i))); 770 wmb(); 771 dummy = *sc->sram; 772 wmb(); 773 } 774 775 *limit = fw_len; 776 status = 0; 777 abort_with_buffer: 778 free(inflate_buffer, M_TEMP); 779 abort_with_zs: 780 inflateEnd(&zs); 781 abort_with_fw: 782 firmware_put(fw, FIRMWARE_UNLOAD); 783 return status; 784 } 785 786 /* 787 * Enable or disable periodic RDMAs from the host to make certain 788 * chipsets resend dropped PCIe messages 789 */ 790 791 static void 792 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 793 { 794 char buf_bytes[72]; 795 volatile uint32_t *confirm; 796 volatile char *submit; 797 uint32_t *buf, dma_low, dma_high; 798 int i; 799 800 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 801 802 /* clear confirmation addr */ 803 confirm = (volatile uint32_t *)sc->cmd; 804 *confirm = 0; 805 wmb(); 806 807 /* send an rdma command to the PCIe engine, and wait for the 808 response in the confirmation address. The firmware should 809 write a -1 there to indicate it is alive and well 810 */ 811 812 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 813 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 814 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 815 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 816 buf[2] = htobe32(0xffffffff); /* confirm data */ 817 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 818 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 819 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 820 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 821 buf[5] = htobe32(enable); /* enable? */ 822 823 824 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 825 826 mxge_pio_copy(submit, buf, 64); 827 wmb(); 828 DELAY(1000); 829 wmb(); 830 i = 0; 831 while (*confirm != 0xffffffff && i < 20) { 832 DELAY(1000); 833 i++; 834 } 835 if (*confirm != 0xffffffff) { 836 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 837 (enable ? "enable" : "disable"), confirm, 838 *confirm); 839 } 840 return; 841 } 842 843 static int 844 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 845 { 846 mcp_cmd_t *buf; 847 char buf_bytes[sizeof(*buf) + 8]; 848 volatile mcp_cmd_response_t *response = sc->cmd; 849 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 850 uint32_t dma_low, dma_high; 851 int err, sleep_total = 0; 852 853 /* ensure buf is aligned to 8 bytes */ 854 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 855 856 buf->data0 = htobe32(data->data0); 857 buf->data1 = htobe32(data->data1); 858 buf->data2 = htobe32(data->data2); 859 buf->cmd = htobe32(cmd); 860 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 861 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 862 863 buf->response_addr.low = htobe32(dma_low); 864 buf->response_addr.high = htobe32(dma_high); 865 mtx_lock(&sc->cmd_mtx); 866 response->result = 0xffffffff; 867 wmb(); 868 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 869 870 /* wait up to 20ms */ 871 err = EAGAIN; 872 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 873 bus_dmamap_sync(sc->cmd_dma.dmat, 874 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 875 wmb(); 876 switch (be32toh(response->result)) { 877 case 0: 878 data->data0 = be32toh(response->data); 879 err = 0; 880 break; 881 case 0xffffffff: 882 DELAY(1000); 883 break; 884 case MXGEFW_CMD_UNKNOWN: 885 err = ENOSYS; 886 break; 887 case MXGEFW_CMD_ERROR_UNALIGNED: 888 err = E2BIG; 889 break; 890 case MXGEFW_CMD_ERROR_BUSY: 891 err = EBUSY; 892 break; 893 case MXGEFW_CMD_ERROR_I2C_ABSENT: 894 err = ENXIO; 895 break; 896 default: 897 device_printf(sc->dev, 898 "mxge: command %d " 899 "failed, result = %d\n", 900 cmd, be32toh(response->result)); 901 err = ENXIO; 902 break; 903 } 904 if (err != EAGAIN) 905 break; 906 } 907 if (err == EAGAIN) 908 device_printf(sc->dev, "mxge: command %d timed out" 909 "result = %d\n", 910 cmd, be32toh(response->result)); 911 mtx_unlock(&sc->cmd_mtx); 912 return err; 913 } 914 915 static int 916 mxge_adopt_running_firmware(mxge_softc_t *sc) 917 { 918 struct mcp_gen_header *hdr; 919 const size_t bytes = sizeof (struct mcp_gen_header); 920 size_t hdr_offset; 921 int status; 922 923 /* find running firmware header */ 924 hdr_offset = htobe32(*(volatile uint32_t *) 925 (sc->sram + MCP_HEADER_PTR_OFFSET)); 926 927 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 928 device_printf(sc->dev, 929 "Running firmware has bad header offset (%d)\n", 930 (int)hdr_offset); 931 return EIO; 932 } 933 934 /* copy header of running firmware from SRAM to host memory to 935 * validate firmware */ 936 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 937 if (hdr == NULL) { 938 device_printf(sc->dev, "could not malloc firmware hdr\n"); 939 return ENOMEM; 940 } 941 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 942 rman_get_bushandle(sc->mem_res), 943 hdr_offset, (char *)hdr, bytes); 944 status = mxge_validate_firmware(sc, hdr); 945 free(hdr, M_DEVBUF); 946 947 /* 948 * check to see if adopted firmware has bug where adopting 949 * it will cause broadcasts to be filtered unless the NIC 950 * is kept in ALLMULTI mode 951 */ 952 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 953 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 954 sc->adopted_rx_filter_bug = 1; 955 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 956 "working around rx filter bug\n", 957 sc->fw_ver_major, sc->fw_ver_minor, 958 sc->fw_ver_tiny); 959 } 960 961 return status; 962 } 963 964 965 static int 966 mxge_load_firmware(mxge_softc_t *sc, int adopt) 967 { 968 volatile uint32_t *confirm; 969 volatile char *submit; 970 char buf_bytes[72]; 971 uint32_t *buf, size, dma_low, dma_high; 972 int status, i; 973 974 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 975 976 size = sc->sram_size; 977 status = mxge_load_firmware_helper(sc, &size); 978 if (status) { 979 if (!adopt) 980 return status; 981 /* Try to use the currently running firmware, if 982 it is new enough */ 983 status = mxge_adopt_running_firmware(sc); 984 if (status) { 985 device_printf(sc->dev, 986 "failed to adopt running firmware\n"); 987 return status; 988 } 989 device_printf(sc->dev, 990 "Successfully adopted running firmware\n"); 991 if (sc->tx_boundary == 4096) { 992 device_printf(sc->dev, 993 "Using firmware currently running on NIC" 994 ". For optimal\n"); 995 device_printf(sc->dev, 996 "performance consider loading optimized " 997 "firmware\n"); 998 } 999 sc->fw_name = mxge_fw_unaligned; 1000 sc->tx_boundary = 2048; 1001 return 0; 1002 } 1003 /* clear confirmation addr */ 1004 confirm = (volatile uint32_t *)sc->cmd; 1005 *confirm = 0; 1006 wmb(); 1007 /* send a reload command to the bootstrap MCP, and wait for the 1008 response in the confirmation address. The firmware should 1009 write a -1 there to indicate it is alive and well 1010 */ 1011 1012 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1013 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1014 1015 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1016 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1017 buf[2] = htobe32(0xffffffff); /* confirm data */ 1018 1019 /* FIX: All newest firmware should un-protect the bottom of 1020 the sram before handoff. However, the very first interfaces 1021 do not. Therefore the handoff copy must skip the first 8 bytes 1022 */ 1023 /* where the code starts*/ 1024 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1025 buf[4] = htobe32(size - 8); /* length of code */ 1026 buf[5] = htobe32(8); /* where to copy to */ 1027 buf[6] = htobe32(0); /* where to jump to */ 1028 1029 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1030 mxge_pio_copy(submit, buf, 64); 1031 wmb(); 1032 DELAY(1000); 1033 wmb(); 1034 i = 0; 1035 while (*confirm != 0xffffffff && i < 20) { 1036 DELAY(1000*10); 1037 i++; 1038 bus_dmamap_sync(sc->cmd_dma.dmat, 1039 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1040 } 1041 if (*confirm != 0xffffffff) { 1042 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1043 confirm, *confirm); 1044 1045 return ENXIO; 1046 } 1047 return 0; 1048 } 1049 1050 static int 1051 mxge_update_mac_address(mxge_softc_t *sc) 1052 { 1053 mxge_cmd_t cmd; 1054 uint8_t *addr = sc->mac_addr; 1055 int status; 1056 1057 1058 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1059 | (addr[2] << 8) | addr[3]); 1060 1061 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1062 1063 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1064 return status; 1065 } 1066 1067 static int 1068 mxge_change_pause(mxge_softc_t *sc, int pause) 1069 { 1070 mxge_cmd_t cmd; 1071 int status; 1072 1073 if (pause) 1074 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1075 &cmd); 1076 else 1077 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1078 &cmd); 1079 1080 if (status) { 1081 device_printf(sc->dev, "Failed to set flow control mode\n"); 1082 return ENXIO; 1083 } 1084 sc->pause = pause; 1085 return 0; 1086 } 1087 1088 static void 1089 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1090 { 1091 mxge_cmd_t cmd; 1092 int status; 1093 1094 if (mxge_always_promisc) 1095 promisc = 1; 1096 1097 if (promisc) 1098 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1099 &cmd); 1100 else 1101 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1102 &cmd); 1103 1104 if (status) { 1105 device_printf(sc->dev, "Failed to set promisc mode\n"); 1106 } 1107 } 1108 1109 static void 1110 mxge_set_multicast_list(mxge_softc_t *sc) 1111 { 1112 mxge_cmd_t cmd; 1113 struct ifmultiaddr *ifma; 1114 struct ifnet *ifp = sc->ifp; 1115 int err; 1116 1117 /* This firmware is known to not support multicast */ 1118 if (!sc->fw_multicast_support) 1119 return; 1120 1121 /* Disable multicast filtering while we play with the lists*/ 1122 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1123 if (err != 0) { 1124 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1125 " error status: %d\n", err); 1126 return; 1127 } 1128 1129 if (sc->adopted_rx_filter_bug) 1130 return; 1131 1132 if (ifp->if_flags & IFF_ALLMULTI) 1133 /* request to disable multicast filtering, so quit here */ 1134 return; 1135 1136 /* Flush all the filters */ 1137 1138 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1139 if (err != 0) { 1140 device_printf(sc->dev, 1141 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1142 ", error status: %d\n", err); 1143 return; 1144 } 1145 1146 /* Walk the multicast list, and add each address */ 1147 1148 if_maddr_rlock(ifp); 1149 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1150 if (ifma->ifma_addr->sa_family != AF_LINK) 1151 continue; 1152 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1153 &cmd.data0, 4); 1154 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1155 &cmd.data1, 2); 1156 cmd.data0 = htonl(cmd.data0); 1157 cmd.data1 = htonl(cmd.data1); 1158 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1159 if (err != 0) { 1160 device_printf(sc->dev, "Failed " 1161 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1162 "%d\t", err); 1163 /* abort, leaving multicast filtering off */ 1164 if_maddr_runlock(ifp); 1165 return; 1166 } 1167 } 1168 if_maddr_runlock(ifp); 1169 /* Enable multicast filtering */ 1170 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1171 if (err != 0) { 1172 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1173 ", error status: %d\n", err); 1174 } 1175 } 1176 1177 static int 1178 mxge_max_mtu(mxge_softc_t *sc) 1179 { 1180 mxge_cmd_t cmd; 1181 int status; 1182 1183 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1184 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1185 1186 /* try to set nbufs to see if it we can 1187 use virtually contiguous jumbos */ 1188 cmd.data0 = 0; 1189 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1190 &cmd); 1191 if (status == 0) 1192 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1193 1194 /* otherwise, we're limited to MJUMPAGESIZE */ 1195 return MJUMPAGESIZE - MXGEFW_PAD; 1196 } 1197 1198 static int 1199 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1200 { 1201 struct mxge_slice_state *ss; 1202 mxge_rx_done_t *rx_done; 1203 volatile uint32_t *irq_claim; 1204 mxge_cmd_t cmd; 1205 int slice, status; 1206 1207 /* try to send a reset command to the card to see if it 1208 is alive */ 1209 memset(&cmd, 0, sizeof (cmd)); 1210 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1211 if (status != 0) { 1212 device_printf(sc->dev, "failed reset\n"); 1213 return ENXIO; 1214 } 1215 1216 mxge_dummy_rdma(sc, 1); 1217 1218 1219 /* set the intrq size */ 1220 cmd.data0 = sc->rx_ring_size; 1221 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1222 1223 /* 1224 * Even though we already know how many slices are supported 1225 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1226 * has magic side effects, and must be called after a reset. 1227 * It must be called prior to calling any RSS related cmds, 1228 * including assigning an interrupt queue for anything but 1229 * slice 0. It must also be called *after* 1230 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1231 * the firmware to compute offsets. 1232 */ 1233 1234 if (sc->num_slices > 1) { 1235 /* ask the maximum number of slices it supports */ 1236 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1237 &cmd); 1238 if (status != 0) { 1239 device_printf(sc->dev, 1240 "failed to get number of slices\n"); 1241 return status; 1242 } 1243 /* 1244 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1245 * to setting up the interrupt queue DMA 1246 */ 1247 cmd.data0 = sc->num_slices; 1248 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1249 #ifdef IFNET_BUF_RING 1250 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1251 #endif 1252 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1253 &cmd); 1254 if (status != 0) { 1255 device_printf(sc->dev, 1256 "failed to set number of slices\n"); 1257 return status; 1258 } 1259 } 1260 1261 1262 if (interrupts_setup) { 1263 /* Now exchange information about interrupts */ 1264 for (slice = 0; slice < sc->num_slices; slice++) { 1265 rx_done = &sc->ss[slice].rx_done; 1266 memset(rx_done->entry, 0, sc->rx_ring_size); 1267 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1268 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1269 cmd.data2 = slice; 1270 status |= mxge_send_cmd(sc, 1271 MXGEFW_CMD_SET_INTRQ_DMA, 1272 &cmd); 1273 } 1274 } 1275 1276 status |= mxge_send_cmd(sc, 1277 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1278 1279 1280 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1281 1282 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1283 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1284 1285 1286 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1287 &cmd); 1288 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1289 if (status != 0) { 1290 device_printf(sc->dev, "failed set interrupt parameters\n"); 1291 return status; 1292 } 1293 1294 1295 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1296 1297 1298 /* run a DMA benchmark */ 1299 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1300 1301 for (slice = 0; slice < sc->num_slices; slice++) { 1302 ss = &sc->ss[slice]; 1303 1304 ss->irq_claim = irq_claim + (2 * slice); 1305 /* reset mcp/driver shared state back to 0 */ 1306 ss->rx_done.idx = 0; 1307 ss->rx_done.cnt = 0; 1308 ss->tx.req = 0; 1309 ss->tx.done = 0; 1310 ss->tx.pkt_done = 0; 1311 ss->tx.queue_active = 0; 1312 ss->tx.activate = 0; 1313 ss->tx.deactivate = 0; 1314 ss->tx.wake = 0; 1315 ss->tx.defrag = 0; 1316 ss->tx.stall = 0; 1317 ss->rx_big.cnt = 0; 1318 ss->rx_small.cnt = 0; 1319 ss->lc.lro_bad_csum = 0; 1320 ss->lc.lro_queued = 0; 1321 ss->lc.lro_flushed = 0; 1322 if (ss->fw_stats != NULL) { 1323 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1324 } 1325 } 1326 sc->rdma_tags_available = 15; 1327 status = mxge_update_mac_address(sc); 1328 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1329 mxge_change_pause(sc, sc->pause); 1330 mxge_set_multicast_list(sc); 1331 if (sc->throttle) { 1332 cmd.data0 = sc->throttle; 1333 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1334 &cmd)) { 1335 device_printf(sc->dev, 1336 "can't enable throttle\n"); 1337 } 1338 } 1339 return status; 1340 } 1341 1342 static int 1343 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1344 { 1345 mxge_cmd_t cmd; 1346 mxge_softc_t *sc; 1347 int err; 1348 unsigned int throttle; 1349 1350 sc = arg1; 1351 throttle = sc->throttle; 1352 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1353 if (err != 0) { 1354 return err; 1355 } 1356 1357 if (throttle == sc->throttle) 1358 return 0; 1359 1360 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1361 return EINVAL; 1362 1363 mtx_lock(&sc->driver_mtx); 1364 cmd.data0 = throttle; 1365 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1366 if (err == 0) 1367 sc->throttle = throttle; 1368 mtx_unlock(&sc->driver_mtx); 1369 return err; 1370 } 1371 1372 static int 1373 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1374 { 1375 mxge_softc_t *sc; 1376 unsigned int intr_coal_delay; 1377 int err; 1378 1379 sc = arg1; 1380 intr_coal_delay = sc->intr_coal_delay; 1381 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1382 if (err != 0) { 1383 return err; 1384 } 1385 if (intr_coal_delay == sc->intr_coal_delay) 1386 return 0; 1387 1388 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1389 return EINVAL; 1390 1391 mtx_lock(&sc->driver_mtx); 1392 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1393 sc->intr_coal_delay = intr_coal_delay; 1394 1395 mtx_unlock(&sc->driver_mtx); 1396 return err; 1397 } 1398 1399 static int 1400 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1401 { 1402 mxge_softc_t *sc; 1403 unsigned int enabled; 1404 int err; 1405 1406 sc = arg1; 1407 enabled = sc->pause; 1408 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1409 if (err != 0) { 1410 return err; 1411 } 1412 if (enabled == sc->pause) 1413 return 0; 1414 1415 mtx_lock(&sc->driver_mtx); 1416 err = mxge_change_pause(sc, enabled); 1417 mtx_unlock(&sc->driver_mtx); 1418 return err; 1419 } 1420 1421 static int 1422 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1423 { 1424 int err; 1425 1426 if (arg1 == NULL) 1427 return EFAULT; 1428 arg2 = be32toh(*(int *)arg1); 1429 arg1 = NULL; 1430 err = sysctl_handle_int(oidp, arg1, arg2, req); 1431 1432 return err; 1433 } 1434 1435 static void 1436 mxge_rem_sysctls(mxge_softc_t *sc) 1437 { 1438 struct mxge_slice_state *ss; 1439 int slice; 1440 1441 if (sc->slice_sysctl_tree == NULL) 1442 return; 1443 1444 for (slice = 0; slice < sc->num_slices; slice++) { 1445 ss = &sc->ss[slice]; 1446 if (ss == NULL || ss->sysctl_tree == NULL) 1447 continue; 1448 sysctl_ctx_free(&ss->sysctl_ctx); 1449 ss->sysctl_tree = NULL; 1450 } 1451 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1452 sc->slice_sysctl_tree = NULL; 1453 } 1454 1455 static void 1456 mxge_add_sysctls(mxge_softc_t *sc) 1457 { 1458 struct sysctl_ctx_list *ctx; 1459 struct sysctl_oid_list *children; 1460 mcp_irq_data_t *fw; 1461 struct mxge_slice_state *ss; 1462 int slice; 1463 char slice_num[8]; 1464 1465 ctx = device_get_sysctl_ctx(sc->dev); 1466 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1467 fw = sc->ss[0].fw_stats; 1468 1469 /* random information */ 1470 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1471 "firmware_version", 1472 CTLFLAG_RD, sc->fw_version, 1473 0, "firmware version"); 1474 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1475 "serial_number", 1476 CTLFLAG_RD, sc->serial_number_string, 1477 0, "serial number"); 1478 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1479 "product_code", 1480 CTLFLAG_RD, sc->product_code_string, 1481 0, "product_code"); 1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1483 "pcie_link_width", 1484 CTLFLAG_RD, &sc->link_width, 1485 0, "tx_boundary"); 1486 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1487 "tx_boundary", 1488 CTLFLAG_RD, &sc->tx_boundary, 1489 0, "tx_boundary"); 1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1491 "write_combine", 1492 CTLFLAG_RD, &sc->wc, 1493 0, "write combining PIO?"); 1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1495 "read_dma_MBs", 1496 CTLFLAG_RD, &sc->read_dma, 1497 0, "DMA Read speed in MB/s"); 1498 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1499 "write_dma_MBs", 1500 CTLFLAG_RD, &sc->write_dma, 1501 0, "DMA Write speed in MB/s"); 1502 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1503 "read_write_dma_MBs", 1504 CTLFLAG_RD, &sc->read_write_dma, 1505 0, "DMA concurrent Read/Write speed in MB/s"); 1506 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1507 "watchdog_resets", 1508 CTLFLAG_RD, &sc->watchdog_resets, 1509 0, "Number of times NIC was reset"); 1510 1511 1512 /* performance related tunables */ 1513 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1514 "intr_coal_delay", 1515 CTLTYPE_INT|CTLFLAG_RW, sc, 1516 0, mxge_change_intr_coal, 1517 "I", "interrupt coalescing delay in usecs"); 1518 1519 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1520 "throttle", 1521 CTLTYPE_INT|CTLFLAG_RW, sc, 1522 0, mxge_change_throttle, 1523 "I", "transmit throttling"); 1524 1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1526 "flow_control_enabled", 1527 CTLTYPE_INT|CTLFLAG_RW, sc, 1528 0, mxge_change_flow_control, 1529 "I", "interrupt coalescing delay in usecs"); 1530 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "deassert_wait", 1533 CTLFLAG_RW, &mxge_deassert_wait, 1534 0, "Wait for IRQ line to go low in ihandler"); 1535 1536 /* stats block from firmware is in network byte order. 1537 Need to swap it */ 1538 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1539 "link_up", 1540 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1541 0, mxge_handle_be32, 1542 "I", "link up"); 1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1544 "rdma_tags_available", 1545 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1546 0, mxge_handle_be32, 1547 "I", "rdma_tags_available"); 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "dropped_bad_crc32", 1550 CTLTYPE_INT|CTLFLAG_RD, 1551 &fw->dropped_bad_crc32, 1552 0, mxge_handle_be32, 1553 "I", "dropped_bad_crc32"); 1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1555 "dropped_bad_phy", 1556 CTLTYPE_INT|CTLFLAG_RD, 1557 &fw->dropped_bad_phy, 1558 0, mxge_handle_be32, 1559 "I", "dropped_bad_phy"); 1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1561 "dropped_link_error_or_filtered", 1562 CTLTYPE_INT|CTLFLAG_RD, 1563 &fw->dropped_link_error_or_filtered, 1564 0, mxge_handle_be32, 1565 "I", "dropped_link_error_or_filtered"); 1566 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1567 "dropped_link_overflow", 1568 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1569 0, mxge_handle_be32, 1570 "I", "dropped_link_overflow"); 1571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1572 "dropped_multicast_filtered", 1573 CTLTYPE_INT|CTLFLAG_RD, 1574 &fw->dropped_multicast_filtered, 1575 0, mxge_handle_be32, 1576 "I", "dropped_multicast_filtered"); 1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1578 "dropped_no_big_buffer", 1579 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1580 0, mxge_handle_be32, 1581 "I", "dropped_no_big_buffer"); 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_no_small_buffer", 1584 CTLTYPE_INT|CTLFLAG_RD, 1585 &fw->dropped_no_small_buffer, 1586 0, mxge_handle_be32, 1587 "I", "dropped_no_small_buffer"); 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "dropped_overrun", 1590 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1591 0, mxge_handle_be32, 1592 "I", "dropped_overrun"); 1593 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1594 "dropped_pause", 1595 CTLTYPE_INT|CTLFLAG_RD, 1596 &fw->dropped_pause, 1597 0, mxge_handle_be32, 1598 "I", "dropped_pause"); 1599 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1600 "dropped_runt", 1601 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1602 0, mxge_handle_be32, 1603 "I", "dropped_runt"); 1604 1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1606 "dropped_unicast_filtered", 1607 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1608 0, mxge_handle_be32, 1609 "I", "dropped_unicast_filtered"); 1610 1611 /* verbose printing? */ 1612 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1613 "verbose", 1614 CTLFLAG_RW, &mxge_verbose, 1615 0, "verbose printing"); 1616 1617 /* add counters exported for debugging from all slices */ 1618 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1619 sc->slice_sysctl_tree = 1620 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1621 "slice", CTLFLAG_RD, 0, ""); 1622 1623 for (slice = 0; slice < sc->num_slices; slice++) { 1624 ss = &sc->ss[slice]; 1625 sysctl_ctx_init(&ss->sysctl_ctx); 1626 ctx = &ss->sysctl_ctx; 1627 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1628 sprintf(slice_num, "%d", slice); 1629 ss->sysctl_tree = 1630 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1631 CTLFLAG_RD, 0, ""); 1632 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1634 "rx_small_cnt", 1635 CTLFLAG_RD, &ss->rx_small.cnt, 1636 0, "rx_small_cnt"); 1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1638 "rx_big_cnt", 1639 CTLFLAG_RD, &ss->rx_big.cnt, 1640 0, "rx_small_cnt"); 1641 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1642 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1643 0, "number of lro merge queues flushed"); 1644 1645 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1646 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1647 0, "number of bad csums preventing LRO"); 1648 1649 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1650 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1651 0, "number of frames appended to lro merge" 1652 "queues"); 1653 1654 #ifndef IFNET_BUF_RING 1655 /* only transmit from slice 0 for now */ 1656 if (slice > 0) 1657 continue; 1658 #endif 1659 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1660 "tx_req", 1661 CTLFLAG_RD, &ss->tx.req, 1662 0, "tx_req"); 1663 1664 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1665 "tx_done", 1666 CTLFLAG_RD, &ss->tx.done, 1667 0, "tx_done"); 1668 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1669 "tx_pkt_done", 1670 CTLFLAG_RD, &ss->tx.pkt_done, 1671 0, "tx_done"); 1672 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1673 "tx_stall", 1674 CTLFLAG_RD, &ss->tx.stall, 1675 0, "tx_stall"); 1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1677 "tx_wake", 1678 CTLFLAG_RD, &ss->tx.wake, 1679 0, "tx_wake"); 1680 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1681 "tx_defrag", 1682 CTLFLAG_RD, &ss->tx.defrag, 1683 0, "tx_defrag"); 1684 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1685 "tx_queue_active", 1686 CTLFLAG_RD, &ss->tx.queue_active, 1687 0, "tx_queue_active"); 1688 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1689 "tx_activate", 1690 CTLFLAG_RD, &ss->tx.activate, 1691 0, "tx_activate"); 1692 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1693 "tx_deactivate", 1694 CTLFLAG_RD, &ss->tx.deactivate, 1695 0, "tx_deactivate"); 1696 } 1697 } 1698 1699 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1700 backwards one at a time and handle ring wraps */ 1701 1702 static inline void 1703 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1704 mcp_kreq_ether_send_t *src, int cnt) 1705 { 1706 int idx, starting_slot; 1707 starting_slot = tx->req; 1708 while (cnt > 1) { 1709 cnt--; 1710 idx = (starting_slot + cnt) & tx->mask; 1711 mxge_pio_copy(&tx->lanai[idx], 1712 &src[cnt], sizeof(*src)); 1713 wmb(); 1714 } 1715 } 1716 1717 /* 1718 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1719 * at most 32 bytes at a time, so as to avoid involving the software 1720 * pio handler in the nic. We re-write the first segment's flags 1721 * to mark them valid only after writing the entire chain 1722 */ 1723 1724 static inline void 1725 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1726 int cnt) 1727 { 1728 int idx, i; 1729 uint32_t *src_ints; 1730 volatile uint32_t *dst_ints; 1731 mcp_kreq_ether_send_t *srcp; 1732 volatile mcp_kreq_ether_send_t *dstp, *dst; 1733 uint8_t last_flags; 1734 1735 idx = tx->req & tx->mask; 1736 1737 last_flags = src->flags; 1738 src->flags = 0; 1739 wmb(); 1740 dst = dstp = &tx->lanai[idx]; 1741 srcp = src; 1742 1743 if ((idx + cnt) < tx->mask) { 1744 for (i = 0; i < (cnt - 1); i += 2) { 1745 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1746 wmb(); /* force write every 32 bytes */ 1747 srcp += 2; 1748 dstp += 2; 1749 } 1750 } else { 1751 /* submit all but the first request, and ensure 1752 that it is submitted below */ 1753 mxge_submit_req_backwards(tx, src, cnt); 1754 i = 0; 1755 } 1756 if (i < cnt) { 1757 /* submit the first request */ 1758 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1759 wmb(); /* barrier before setting valid flag */ 1760 } 1761 1762 /* re-write the last 32-bits with the valid flags */ 1763 src->flags = last_flags; 1764 src_ints = (uint32_t *)src; 1765 src_ints+=3; 1766 dst_ints = (volatile uint32_t *)dst; 1767 dst_ints+=3; 1768 *dst_ints = *src_ints; 1769 tx->req += cnt; 1770 wmb(); 1771 } 1772 1773 static int 1774 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1775 struct mxge_pkt_info *pi) 1776 { 1777 struct ether_vlan_header *eh; 1778 uint16_t etype; 1779 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1780 #if IFCAP_TSO6 && defined(INET6) 1781 int nxt; 1782 #endif 1783 1784 eh = mtod(m, struct ether_vlan_header *); 1785 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1786 etype = ntohs(eh->evl_proto); 1787 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1788 } else { 1789 etype = ntohs(eh->evl_encap_proto); 1790 pi->ip_off = ETHER_HDR_LEN; 1791 } 1792 1793 switch (etype) { 1794 case ETHERTYPE_IP: 1795 /* 1796 * ensure ip header is in first mbuf, copy it to a 1797 * scratch buffer if not 1798 */ 1799 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1800 pi->ip6 = NULL; 1801 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1802 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1803 ss->scratch); 1804 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1805 } 1806 pi->ip_hlen = pi->ip->ip_hl << 2; 1807 if (!tso) 1808 return 0; 1809 1810 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1811 sizeof(struct tcphdr))) { 1812 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1813 sizeof(struct tcphdr), ss->scratch); 1814 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1815 } 1816 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1817 break; 1818 #if IFCAP_TSO6 && defined(INET6) 1819 case ETHERTYPE_IPV6: 1820 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1821 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1822 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1823 ss->scratch); 1824 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1825 } 1826 nxt = 0; 1827 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1828 pi->ip_hlen -= pi->ip_off; 1829 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1830 return EINVAL; 1831 1832 if (!tso) 1833 return 0; 1834 1835 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1836 return EINVAL; 1837 1838 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1839 sizeof(struct tcphdr))) { 1840 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1841 sizeof(struct tcphdr), ss->scratch); 1842 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1843 } 1844 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1845 break; 1846 #endif 1847 default: 1848 return EINVAL; 1849 } 1850 return 0; 1851 } 1852 1853 #if IFCAP_TSO4 1854 1855 static void 1856 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1857 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1858 { 1859 mxge_tx_ring_t *tx; 1860 mcp_kreq_ether_send_t *req; 1861 bus_dma_segment_t *seg; 1862 uint32_t low, high_swapped; 1863 int len, seglen, cum_len, cum_len_next; 1864 int next_is_first, chop, cnt, rdma_count, small; 1865 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1866 uint8_t flags, flags_next; 1867 static int once; 1868 1869 mss = m->m_pkthdr.tso_segsz; 1870 1871 /* negative cum_len signifies to the 1872 * send loop that we are still in the 1873 * header portion of the TSO packet. 1874 */ 1875 1876 cksum_offset = pi->ip_off + pi->ip_hlen; 1877 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1878 1879 /* TSO implies checksum offload on this hardware */ 1880 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1881 /* 1882 * If packet has full TCP csum, replace it with pseudo hdr 1883 * sum that the NIC expects, otherwise the NIC will emit 1884 * packets with bad TCP checksums. 1885 */ 1886 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1887 if (pi->ip6) { 1888 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1889 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1890 sum = in6_cksum_pseudo(pi->ip6, 1891 m->m_pkthdr.len - cksum_offset, 1892 IPPROTO_TCP, 0); 1893 #endif 1894 } else { 1895 #ifdef INET 1896 m->m_pkthdr.csum_flags |= CSUM_TCP; 1897 sum = in_pseudo(pi->ip->ip_src.s_addr, 1898 pi->ip->ip_dst.s_addr, 1899 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1900 cksum_offset))); 1901 #endif 1902 } 1903 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1904 cksum_offset, sizeof(sum), (caddr_t)&sum); 1905 } 1906 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1907 1908 1909 /* for TSO, pseudo_hdr_offset holds mss. 1910 * The firmware figures out where to put 1911 * the checksum by parsing the header. */ 1912 pseudo_hdr_offset = htobe16(mss); 1913 1914 if (pi->ip6) { 1915 /* 1916 * for IPv6 TSO, the "checksum offset" is re-purposed 1917 * to store the TCP header len 1918 */ 1919 cksum_offset = (pi->tcp->th_off << 2); 1920 } 1921 1922 tx = &ss->tx; 1923 req = tx->req_list; 1924 seg = tx->seg_list; 1925 cnt = 0; 1926 rdma_count = 0; 1927 /* "rdma_count" is the number of RDMAs belonging to the 1928 * current packet BEFORE the current send request. For 1929 * non-TSO packets, this is equal to "count". 1930 * For TSO packets, rdma_count needs to be reset 1931 * to 0 after a segment cut. 1932 * 1933 * The rdma_count field of the send request is 1934 * the number of RDMAs of the packet starting at 1935 * that request. For TSO send requests with one ore more cuts 1936 * in the middle, this is the number of RDMAs starting 1937 * after the last cut in the request. All previous 1938 * segments before the last cut implicitly have 1 RDMA. 1939 * 1940 * Since the number of RDMAs is not known beforehand, 1941 * it must be filled-in retroactively - after each 1942 * segmentation cut or at the end of the entire packet. 1943 */ 1944 1945 while (busdma_seg_cnt) { 1946 /* Break the busdma segment up into pieces*/ 1947 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1948 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1949 len = seg->ds_len; 1950 1951 while (len) { 1952 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1953 seglen = len; 1954 cum_len_next = cum_len + seglen; 1955 (req-rdma_count)->rdma_count = rdma_count + 1; 1956 if (__predict_true(cum_len >= 0)) { 1957 /* payload */ 1958 chop = (cum_len_next > mss); 1959 cum_len_next = cum_len_next % mss; 1960 next_is_first = (cum_len_next == 0); 1961 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1962 flags_next |= next_is_first * 1963 MXGEFW_FLAGS_FIRST; 1964 rdma_count |= -(chop | next_is_first); 1965 rdma_count += chop & !next_is_first; 1966 } else if (cum_len_next >= 0) { 1967 /* header ends */ 1968 rdma_count = -1; 1969 cum_len_next = 0; 1970 seglen = -cum_len; 1971 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1972 flags_next = MXGEFW_FLAGS_TSO_PLD | 1973 MXGEFW_FLAGS_FIRST | 1974 (small * MXGEFW_FLAGS_SMALL); 1975 } 1976 1977 req->addr_high = high_swapped; 1978 req->addr_low = htobe32(low); 1979 req->pseudo_hdr_offset = pseudo_hdr_offset; 1980 req->pad = 0; 1981 req->rdma_count = 1; 1982 req->length = htobe16(seglen); 1983 req->cksum_offset = cksum_offset; 1984 req->flags = flags | ((cum_len & 1) * 1985 MXGEFW_FLAGS_ALIGN_ODD); 1986 low += seglen; 1987 len -= seglen; 1988 cum_len = cum_len_next; 1989 flags = flags_next; 1990 req++; 1991 cnt++; 1992 rdma_count++; 1993 if (cksum_offset != 0 && !pi->ip6) { 1994 if (__predict_false(cksum_offset > seglen)) 1995 cksum_offset -= seglen; 1996 else 1997 cksum_offset = 0; 1998 } 1999 if (__predict_false(cnt > tx->max_desc)) 2000 goto drop; 2001 } 2002 busdma_seg_cnt--; 2003 seg++; 2004 } 2005 (req-rdma_count)->rdma_count = rdma_count; 2006 2007 do { 2008 req--; 2009 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2010 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 2011 2012 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2013 mxge_submit_req(tx, tx->req_list, cnt); 2014 #ifdef IFNET_BUF_RING 2015 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2016 /* tell the NIC to start polling this slice */ 2017 *tx->send_go = 1; 2018 tx->queue_active = 1; 2019 tx->activate++; 2020 wmb(); 2021 } 2022 #endif 2023 return; 2024 2025 drop: 2026 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2027 m_freem(m); 2028 ss->oerrors++; 2029 if (!once) { 2030 printf("tx->max_desc exceeded via TSO!\n"); 2031 printf("mss = %d, %ld, %d!\n", mss, 2032 (long)seg - (long)tx->seg_list, tx->max_desc); 2033 once = 1; 2034 } 2035 return; 2036 2037 } 2038 2039 #endif /* IFCAP_TSO4 */ 2040 2041 #ifdef MXGE_NEW_VLAN_API 2042 /* 2043 * We reproduce the software vlan tag insertion from 2044 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2045 * vlan tag insertion. We need to advertise this in order to have the 2046 * vlan interface respect our csum offload flags. 2047 */ 2048 static struct mbuf * 2049 mxge_vlan_tag_insert(struct mbuf *m) 2050 { 2051 struct ether_vlan_header *evl; 2052 2053 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2054 if (__predict_false(m == NULL)) 2055 return NULL; 2056 if (m->m_len < sizeof(*evl)) { 2057 m = m_pullup(m, sizeof(*evl)); 2058 if (__predict_false(m == NULL)) 2059 return NULL; 2060 } 2061 /* 2062 * Transform the Ethernet header into an Ethernet header 2063 * with 802.1Q encapsulation. 2064 */ 2065 evl = mtod(m, struct ether_vlan_header *); 2066 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2067 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2068 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2069 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2070 m->m_flags &= ~M_VLANTAG; 2071 return m; 2072 } 2073 #endif /* MXGE_NEW_VLAN_API */ 2074 2075 static void 2076 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2077 { 2078 struct mxge_pkt_info pi = {0,0,0,0}; 2079 mxge_softc_t *sc; 2080 mcp_kreq_ether_send_t *req; 2081 bus_dma_segment_t *seg; 2082 struct mbuf *m_tmp; 2083 struct ifnet *ifp; 2084 mxge_tx_ring_t *tx; 2085 int cnt, cum_len, err, i, idx, odd_flag; 2086 uint16_t pseudo_hdr_offset; 2087 uint8_t flags, cksum_offset; 2088 2089 2090 sc = ss->sc; 2091 ifp = sc->ifp; 2092 tx = &ss->tx; 2093 2094 #ifdef MXGE_NEW_VLAN_API 2095 if (m->m_flags & M_VLANTAG) { 2096 m = mxge_vlan_tag_insert(m); 2097 if (__predict_false(m == NULL)) 2098 goto drop_without_m; 2099 } 2100 #endif 2101 if (m->m_pkthdr.csum_flags & 2102 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2103 if (mxge_parse_tx(ss, m, &pi)) 2104 goto drop; 2105 } 2106 2107 /* (try to) map the frame for DMA */ 2108 idx = tx->req & tx->mask; 2109 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2110 m, tx->seg_list, &cnt, 2111 BUS_DMA_NOWAIT); 2112 if (__predict_false(err == EFBIG)) { 2113 /* Too many segments in the chain. Try 2114 to defrag */ 2115 m_tmp = m_defrag(m, M_NOWAIT); 2116 if (m_tmp == NULL) { 2117 goto drop; 2118 } 2119 ss->tx.defrag++; 2120 m = m_tmp; 2121 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2122 tx->info[idx].map, 2123 m, tx->seg_list, &cnt, 2124 BUS_DMA_NOWAIT); 2125 } 2126 if (__predict_false(err != 0)) { 2127 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2128 " packet len = %d\n", err, m->m_pkthdr.len); 2129 goto drop; 2130 } 2131 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2132 BUS_DMASYNC_PREWRITE); 2133 tx->info[idx].m = m; 2134 2135 #if IFCAP_TSO4 2136 /* TSO is different enough, we handle it in another routine */ 2137 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2138 mxge_encap_tso(ss, m, cnt, &pi); 2139 return; 2140 } 2141 #endif 2142 2143 req = tx->req_list; 2144 cksum_offset = 0; 2145 pseudo_hdr_offset = 0; 2146 flags = MXGEFW_FLAGS_NO_TSO; 2147 2148 /* checksum offloading? */ 2149 if (m->m_pkthdr.csum_flags & 2150 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2151 /* ensure ip header is in first mbuf, copy 2152 it to a scratch buffer if not */ 2153 cksum_offset = pi.ip_off + pi.ip_hlen; 2154 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2155 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2156 req->cksum_offset = cksum_offset; 2157 flags |= MXGEFW_FLAGS_CKSUM; 2158 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2159 } else { 2160 odd_flag = 0; 2161 } 2162 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2163 flags |= MXGEFW_FLAGS_SMALL; 2164 2165 /* convert segments into a request list */ 2166 cum_len = 0; 2167 seg = tx->seg_list; 2168 req->flags = MXGEFW_FLAGS_FIRST; 2169 for (i = 0; i < cnt; i++) { 2170 req->addr_low = 2171 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2172 req->addr_high = 2173 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2174 req->length = htobe16(seg->ds_len); 2175 req->cksum_offset = cksum_offset; 2176 if (cksum_offset > seg->ds_len) 2177 cksum_offset -= seg->ds_len; 2178 else 2179 cksum_offset = 0; 2180 req->pseudo_hdr_offset = pseudo_hdr_offset; 2181 req->pad = 0; /* complete solid 16-byte block */ 2182 req->rdma_count = 1; 2183 req->flags |= flags | ((cum_len & 1) * odd_flag); 2184 cum_len += seg->ds_len; 2185 seg++; 2186 req++; 2187 req->flags = 0; 2188 } 2189 req--; 2190 /* pad runts to 60 bytes */ 2191 if (cum_len < 60) { 2192 req++; 2193 req->addr_low = 2194 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2195 req->addr_high = 2196 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2197 req->length = htobe16(60 - cum_len); 2198 req->cksum_offset = 0; 2199 req->pseudo_hdr_offset = pseudo_hdr_offset; 2200 req->pad = 0; /* complete solid 16-byte block */ 2201 req->rdma_count = 1; 2202 req->flags |= flags | ((cum_len & 1) * odd_flag); 2203 cnt++; 2204 } 2205 2206 tx->req_list[0].rdma_count = cnt; 2207 #if 0 2208 /* print what the firmware will see */ 2209 for (i = 0; i < cnt; i++) { 2210 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2211 "cso:%d, flags:0x%x, rdma:%d\n", 2212 i, (int)ntohl(tx->req_list[i].addr_high), 2213 (int)ntohl(tx->req_list[i].addr_low), 2214 (int)ntohs(tx->req_list[i].length), 2215 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2216 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2217 tx->req_list[i].rdma_count); 2218 } 2219 printf("--------------\n"); 2220 #endif 2221 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2222 mxge_submit_req(tx, tx->req_list, cnt); 2223 #ifdef IFNET_BUF_RING 2224 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2225 /* tell the NIC to start polling this slice */ 2226 *tx->send_go = 1; 2227 tx->queue_active = 1; 2228 tx->activate++; 2229 wmb(); 2230 } 2231 #endif 2232 return; 2233 2234 drop: 2235 m_freem(m); 2236 drop_without_m: 2237 ss->oerrors++; 2238 return; 2239 } 2240 2241 #ifdef IFNET_BUF_RING 2242 static void 2243 mxge_qflush(struct ifnet *ifp) 2244 { 2245 mxge_softc_t *sc = ifp->if_softc; 2246 mxge_tx_ring_t *tx; 2247 struct mbuf *m; 2248 int slice; 2249 2250 for (slice = 0; slice < sc->num_slices; slice++) { 2251 tx = &sc->ss[slice].tx; 2252 mtx_lock(&tx->mtx); 2253 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2254 m_freem(m); 2255 mtx_unlock(&tx->mtx); 2256 } 2257 if_qflush(ifp); 2258 } 2259 2260 static inline void 2261 mxge_start_locked(struct mxge_slice_state *ss) 2262 { 2263 mxge_softc_t *sc; 2264 struct mbuf *m; 2265 struct ifnet *ifp; 2266 mxge_tx_ring_t *tx; 2267 2268 sc = ss->sc; 2269 ifp = sc->ifp; 2270 tx = &ss->tx; 2271 2272 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2273 m = drbr_dequeue(ifp, tx->br); 2274 if (m == NULL) { 2275 return; 2276 } 2277 /* let BPF see it */ 2278 BPF_MTAP(ifp, m); 2279 2280 /* give it to the nic */ 2281 mxge_encap(ss, m); 2282 } 2283 /* ran out of transmit slots */ 2284 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2285 && (!drbr_empty(ifp, tx->br))) { 2286 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2287 tx->stall++; 2288 } 2289 } 2290 2291 static int 2292 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2293 { 2294 mxge_softc_t *sc; 2295 struct ifnet *ifp; 2296 mxge_tx_ring_t *tx; 2297 int err; 2298 2299 sc = ss->sc; 2300 ifp = sc->ifp; 2301 tx = &ss->tx; 2302 2303 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2304 IFF_DRV_RUNNING) { 2305 err = drbr_enqueue(ifp, tx->br, m); 2306 return (err); 2307 } 2308 2309 if (!drbr_needs_enqueue(ifp, tx->br) && 2310 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2311 /* let BPF see it */ 2312 BPF_MTAP(ifp, m); 2313 /* give it to the nic */ 2314 mxge_encap(ss, m); 2315 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2316 return (err); 2317 } 2318 if (!drbr_empty(ifp, tx->br)) 2319 mxge_start_locked(ss); 2320 return (0); 2321 } 2322 2323 static int 2324 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2325 { 2326 mxge_softc_t *sc = ifp->if_softc; 2327 struct mxge_slice_state *ss; 2328 mxge_tx_ring_t *tx; 2329 int err = 0; 2330 int slice; 2331 2332 slice = m->m_pkthdr.flowid; 2333 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2334 2335 ss = &sc->ss[slice]; 2336 tx = &ss->tx; 2337 2338 if (mtx_trylock(&tx->mtx)) { 2339 err = mxge_transmit_locked(ss, m); 2340 mtx_unlock(&tx->mtx); 2341 } else { 2342 err = drbr_enqueue(ifp, tx->br, m); 2343 } 2344 2345 return (err); 2346 } 2347 2348 #else 2349 2350 static inline void 2351 mxge_start_locked(struct mxge_slice_state *ss) 2352 { 2353 mxge_softc_t *sc; 2354 struct mbuf *m; 2355 struct ifnet *ifp; 2356 mxge_tx_ring_t *tx; 2357 2358 sc = ss->sc; 2359 ifp = sc->ifp; 2360 tx = &ss->tx; 2361 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2362 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2363 if (m == NULL) { 2364 return; 2365 } 2366 /* let BPF see it */ 2367 BPF_MTAP(ifp, m); 2368 2369 /* give it to the nic */ 2370 mxge_encap(ss, m); 2371 } 2372 /* ran out of transmit slots */ 2373 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2374 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2375 tx->stall++; 2376 } 2377 } 2378 #endif 2379 static void 2380 mxge_start(struct ifnet *ifp) 2381 { 2382 mxge_softc_t *sc = ifp->if_softc; 2383 struct mxge_slice_state *ss; 2384 2385 /* only use the first slice for now */ 2386 ss = &sc->ss[0]; 2387 mtx_lock(&ss->tx.mtx); 2388 mxge_start_locked(ss); 2389 mtx_unlock(&ss->tx.mtx); 2390 } 2391 2392 /* 2393 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2394 * at most 32 bytes at a time, so as to avoid involving the software 2395 * pio handler in the nic. We re-write the first segment's low 2396 * DMA address to mark it valid only after we write the entire chunk 2397 * in a burst 2398 */ 2399 static inline void 2400 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2401 mcp_kreq_ether_recv_t *src) 2402 { 2403 uint32_t low; 2404 2405 low = src->addr_low; 2406 src->addr_low = 0xffffffff; 2407 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2408 wmb(); 2409 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2410 wmb(); 2411 src->addr_low = low; 2412 dst->addr_low = low; 2413 wmb(); 2414 } 2415 2416 static int 2417 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2418 { 2419 bus_dma_segment_t seg; 2420 struct mbuf *m; 2421 mxge_rx_ring_t *rx = &ss->rx_small; 2422 int cnt, err; 2423 2424 m = m_gethdr(M_NOWAIT, MT_DATA); 2425 if (m == NULL) { 2426 rx->alloc_fail++; 2427 err = ENOBUFS; 2428 goto done; 2429 } 2430 m->m_len = MHLEN; 2431 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2432 &seg, &cnt, BUS_DMA_NOWAIT); 2433 if (err != 0) { 2434 m_free(m); 2435 goto done; 2436 } 2437 rx->info[idx].m = m; 2438 rx->shadow[idx].addr_low = 2439 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2440 rx->shadow[idx].addr_high = 2441 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2442 2443 done: 2444 if ((idx & 7) == 7) 2445 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2446 return err; 2447 } 2448 2449 static int 2450 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2451 { 2452 bus_dma_segment_t seg[3]; 2453 struct mbuf *m; 2454 mxge_rx_ring_t *rx = &ss->rx_big; 2455 int cnt, err, i; 2456 2457 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2458 if (m == NULL) { 2459 rx->alloc_fail++; 2460 err = ENOBUFS; 2461 goto done; 2462 } 2463 m->m_len = rx->mlen; 2464 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2465 seg, &cnt, BUS_DMA_NOWAIT); 2466 if (err != 0) { 2467 m_free(m); 2468 goto done; 2469 } 2470 rx->info[idx].m = m; 2471 rx->shadow[idx].addr_low = 2472 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2473 rx->shadow[idx].addr_high = 2474 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2475 2476 #if MXGE_VIRT_JUMBOS 2477 for (i = 1; i < cnt; i++) { 2478 rx->shadow[idx + i].addr_low = 2479 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2480 rx->shadow[idx + i].addr_high = 2481 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2482 } 2483 #endif 2484 2485 done: 2486 for (i = 0; i < rx->nbufs; i++) { 2487 if ((idx & 7) == 7) { 2488 mxge_submit_8rx(&rx->lanai[idx - 7], 2489 &rx->shadow[idx - 7]); 2490 } 2491 idx++; 2492 } 2493 return err; 2494 } 2495 2496 #ifdef INET6 2497 2498 static uint16_t 2499 mxge_csum_generic(uint16_t *raw, int len) 2500 { 2501 uint32_t csum; 2502 2503 2504 csum = 0; 2505 while (len > 0) { 2506 csum += *raw; 2507 raw++; 2508 len -= 2; 2509 } 2510 csum = (csum >> 16) + (csum & 0xffff); 2511 csum = (csum >> 16) + (csum & 0xffff); 2512 return (uint16_t)csum; 2513 } 2514 2515 static inline uint16_t 2516 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2517 { 2518 uint32_t partial; 2519 int nxt, cksum_offset; 2520 struct ip6_hdr *ip6 = p; 2521 uint16_t c; 2522 2523 nxt = ip6->ip6_nxt; 2524 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2525 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2526 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2527 IPPROTO_IPV6, &nxt); 2528 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2529 return (1); 2530 } 2531 2532 /* 2533 * IPv6 headers do not contain a checksum, and hence 2534 * do not checksum to zero, so they don't "fall out" 2535 * of the partial checksum calculation like IPv4 2536 * headers do. We need to fix the partial checksum by 2537 * subtracting the checksum of the IPv6 header. 2538 */ 2539 2540 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2541 ETHER_HDR_LEN); 2542 csum += ~partial; 2543 csum += (csum < ~partial); 2544 csum = (csum >> 16) + (csum & 0xFFFF); 2545 csum = (csum >> 16) + (csum & 0xFFFF); 2546 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2547 csum); 2548 c ^= 0xffff; 2549 return (c); 2550 } 2551 #endif /* INET6 */ 2552 /* 2553 * Myri10GE hardware checksums are not valid if the sender 2554 * padded the frame with non-zero padding. This is because 2555 * the firmware just does a simple 16-bit 1s complement 2556 * checksum across the entire frame, excluding the first 14 2557 * bytes. It is best to simply to check the checksum and 2558 * tell the stack about it only if the checksum is good 2559 */ 2560 2561 static inline uint16_t 2562 mxge_rx_csum(struct mbuf *m, int csum) 2563 { 2564 struct ether_header *eh; 2565 #ifdef INET 2566 struct ip *ip; 2567 #endif 2568 #if defined(INET) || defined(INET6) 2569 int cap = m->m_pkthdr.rcvif->if_capenable; 2570 #endif 2571 uint16_t c, etype; 2572 2573 2574 eh = mtod(m, struct ether_header *); 2575 etype = ntohs(eh->ether_type); 2576 switch (etype) { 2577 #ifdef INET 2578 case ETHERTYPE_IP: 2579 if ((cap & IFCAP_RXCSUM) == 0) 2580 return (1); 2581 ip = (struct ip *)(eh + 1); 2582 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2583 return (1); 2584 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2585 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2586 (ip->ip_hl << 2) + ip->ip_p)); 2587 c ^= 0xffff; 2588 break; 2589 #endif 2590 #ifdef INET6 2591 case ETHERTYPE_IPV6: 2592 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2593 return (1); 2594 c = mxge_rx_csum6((eh + 1), m, csum); 2595 break; 2596 #endif 2597 default: 2598 c = 1; 2599 } 2600 return (c); 2601 } 2602 2603 static void 2604 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2605 { 2606 struct ether_vlan_header *evl; 2607 struct ether_header *eh; 2608 uint32_t partial; 2609 2610 evl = mtod(m, struct ether_vlan_header *); 2611 eh = mtod(m, struct ether_header *); 2612 2613 /* 2614 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2615 * after what the firmware thought was the end of the ethernet 2616 * header. 2617 */ 2618 2619 /* put checksum into host byte order */ 2620 *csum = ntohs(*csum); 2621 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2622 (*csum) += ~partial; 2623 (*csum) += ((*csum) < ~partial); 2624 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2625 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2626 2627 /* restore checksum to network byte order; 2628 later consumers expect this */ 2629 *csum = htons(*csum); 2630 2631 /* save the tag */ 2632 #ifdef MXGE_NEW_VLAN_API 2633 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2634 #else 2635 { 2636 struct m_tag *mtag; 2637 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2638 M_NOWAIT); 2639 if (mtag == NULL) 2640 return; 2641 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2642 m_tag_prepend(m, mtag); 2643 } 2644 2645 #endif 2646 m->m_flags |= M_VLANTAG; 2647 2648 /* 2649 * Remove the 802.1q header by copying the Ethernet 2650 * addresses over it and adjusting the beginning of 2651 * the data in the mbuf. The encapsulated Ethernet 2652 * type field is already in place. 2653 */ 2654 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2655 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2656 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2657 } 2658 2659 2660 static inline void 2661 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2662 uint32_t csum, int lro) 2663 { 2664 mxge_softc_t *sc; 2665 struct ifnet *ifp; 2666 struct mbuf *m; 2667 struct ether_header *eh; 2668 mxge_rx_ring_t *rx; 2669 bus_dmamap_t old_map; 2670 int idx; 2671 2672 sc = ss->sc; 2673 ifp = sc->ifp; 2674 rx = &ss->rx_big; 2675 idx = rx->cnt & rx->mask; 2676 rx->cnt += rx->nbufs; 2677 /* save a pointer to the received mbuf */ 2678 m = rx->info[idx].m; 2679 /* try to replace the received mbuf */ 2680 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2681 /* drop the frame -- the old mbuf is re-cycled */ 2682 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2683 return; 2684 } 2685 2686 /* unmap the received buffer */ 2687 old_map = rx->info[idx].map; 2688 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2689 bus_dmamap_unload(rx->dmat, old_map); 2690 2691 /* swap the bus_dmamap_t's */ 2692 rx->info[idx].map = rx->extra_map; 2693 rx->extra_map = old_map; 2694 2695 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2696 * aligned */ 2697 m->m_data += MXGEFW_PAD; 2698 2699 m->m_pkthdr.rcvif = ifp; 2700 m->m_len = m->m_pkthdr.len = len; 2701 ss->ipackets++; 2702 eh = mtod(m, struct ether_header *); 2703 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2704 mxge_vlan_tag_remove(m, &csum); 2705 } 2706 /* flowid only valid if RSS hashing is enabled */ 2707 if (sc->num_slices > 1) { 2708 m->m_pkthdr.flowid = (ss - sc->ss); 2709 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2710 } 2711 /* if the checksum is valid, mark it in the mbuf header */ 2712 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2713 (0 == mxge_rx_csum(m, csum))) { 2714 /* Tell the stack that the checksum is good */ 2715 m->m_pkthdr.csum_data = 0xffff; 2716 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2717 CSUM_DATA_VALID; 2718 2719 #if defined(INET) || defined (INET6) 2720 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2721 return; 2722 #endif 2723 } 2724 /* pass the frame up the stack */ 2725 (*ifp->if_input)(ifp, m); 2726 } 2727 2728 static inline void 2729 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2730 uint32_t csum, int lro) 2731 { 2732 mxge_softc_t *sc; 2733 struct ifnet *ifp; 2734 struct ether_header *eh; 2735 struct mbuf *m; 2736 mxge_rx_ring_t *rx; 2737 bus_dmamap_t old_map; 2738 int idx; 2739 2740 sc = ss->sc; 2741 ifp = sc->ifp; 2742 rx = &ss->rx_small; 2743 idx = rx->cnt & rx->mask; 2744 rx->cnt++; 2745 /* save a pointer to the received mbuf */ 2746 m = rx->info[idx].m; 2747 /* try to replace the received mbuf */ 2748 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2749 /* drop the frame -- the old mbuf is re-cycled */ 2750 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2751 return; 2752 } 2753 2754 /* unmap the received buffer */ 2755 old_map = rx->info[idx].map; 2756 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2757 bus_dmamap_unload(rx->dmat, old_map); 2758 2759 /* swap the bus_dmamap_t's */ 2760 rx->info[idx].map = rx->extra_map; 2761 rx->extra_map = old_map; 2762 2763 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2764 * aligned */ 2765 m->m_data += MXGEFW_PAD; 2766 2767 m->m_pkthdr.rcvif = ifp; 2768 m->m_len = m->m_pkthdr.len = len; 2769 ss->ipackets++; 2770 eh = mtod(m, struct ether_header *); 2771 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2772 mxge_vlan_tag_remove(m, &csum); 2773 } 2774 /* flowid only valid if RSS hashing is enabled */ 2775 if (sc->num_slices > 1) { 2776 m->m_pkthdr.flowid = (ss - sc->ss); 2777 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2778 } 2779 /* if the checksum is valid, mark it in the mbuf header */ 2780 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2781 (0 == mxge_rx_csum(m, csum))) { 2782 /* Tell the stack that the checksum is good */ 2783 m->m_pkthdr.csum_data = 0xffff; 2784 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2785 CSUM_DATA_VALID; 2786 2787 #if defined(INET) || defined (INET6) 2788 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2789 return; 2790 #endif 2791 } 2792 /* pass the frame up the stack */ 2793 (*ifp->if_input)(ifp, m); 2794 } 2795 2796 static inline void 2797 mxge_clean_rx_done(struct mxge_slice_state *ss) 2798 { 2799 mxge_rx_done_t *rx_done = &ss->rx_done; 2800 int limit = 0; 2801 uint16_t length; 2802 uint16_t checksum; 2803 int lro; 2804 2805 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2806 while (rx_done->entry[rx_done->idx].length != 0) { 2807 length = ntohs(rx_done->entry[rx_done->idx].length); 2808 rx_done->entry[rx_done->idx].length = 0; 2809 checksum = rx_done->entry[rx_done->idx].checksum; 2810 if (length <= (MHLEN - MXGEFW_PAD)) 2811 mxge_rx_done_small(ss, length, checksum, lro); 2812 else 2813 mxge_rx_done_big(ss, length, checksum, lro); 2814 rx_done->cnt++; 2815 rx_done->idx = rx_done->cnt & rx_done->mask; 2816 2817 /* limit potential for livelock */ 2818 if (__predict_false(++limit > rx_done->mask / 2)) 2819 break; 2820 } 2821 #if defined(INET) || defined (INET6) 2822 tcp_lro_flush_all(&ss->lc); 2823 #endif 2824 } 2825 2826 2827 static inline void 2828 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2829 { 2830 struct ifnet *ifp; 2831 mxge_tx_ring_t *tx; 2832 struct mbuf *m; 2833 bus_dmamap_t map; 2834 int idx; 2835 int *flags; 2836 2837 tx = &ss->tx; 2838 ifp = ss->sc->ifp; 2839 while (tx->pkt_done != mcp_idx) { 2840 idx = tx->done & tx->mask; 2841 tx->done++; 2842 m = tx->info[idx].m; 2843 /* mbuf and DMA map only attached to the first 2844 segment per-mbuf */ 2845 if (m != NULL) { 2846 ss->obytes += m->m_pkthdr.len; 2847 if (m->m_flags & M_MCAST) 2848 ss->omcasts++; 2849 ss->opackets++; 2850 tx->info[idx].m = NULL; 2851 map = tx->info[idx].map; 2852 bus_dmamap_unload(tx->dmat, map); 2853 m_freem(m); 2854 } 2855 if (tx->info[idx].flag) { 2856 tx->info[idx].flag = 0; 2857 tx->pkt_done++; 2858 } 2859 } 2860 2861 /* If we have space, clear IFF_OACTIVE to tell the stack that 2862 its OK to send packets */ 2863 #ifdef IFNET_BUF_RING 2864 flags = &ss->if_drv_flags; 2865 #else 2866 flags = &ifp->if_drv_flags; 2867 #endif 2868 mtx_lock(&ss->tx.mtx); 2869 if ((*flags) & IFF_DRV_OACTIVE && 2870 tx->req - tx->done < (tx->mask + 1)/4) { 2871 *(flags) &= ~IFF_DRV_OACTIVE; 2872 ss->tx.wake++; 2873 mxge_start_locked(ss); 2874 } 2875 #ifdef IFNET_BUF_RING 2876 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2877 /* let the NIC stop polling this queue, since there 2878 * are no more transmits pending */ 2879 if (tx->req == tx->done) { 2880 *tx->send_stop = 1; 2881 tx->queue_active = 0; 2882 tx->deactivate++; 2883 wmb(); 2884 } 2885 } 2886 #endif 2887 mtx_unlock(&ss->tx.mtx); 2888 2889 } 2890 2891 static struct mxge_media_type mxge_xfp_media_types[] = 2892 { 2893 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2894 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2895 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2896 {0, (1 << 5), "10GBASE-ER"}, 2897 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2898 {0, (1 << 3), "10GBASE-SW"}, 2899 {0, (1 << 2), "10GBASE-LW"}, 2900 {0, (1 << 1), "10GBASE-EW"}, 2901 {0, (1 << 0), "Reserved"} 2902 }; 2903 static struct mxge_media_type mxge_sfp_media_types[] = 2904 { 2905 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2906 {0, (1 << 7), "Reserved"}, 2907 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2908 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2909 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2910 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2911 }; 2912 2913 static void 2914 mxge_media_set(mxge_softc_t *sc, int media_type) 2915 { 2916 2917 2918 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2919 0, NULL); 2920 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2921 sc->current_media = media_type; 2922 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2923 } 2924 2925 static void 2926 mxge_media_init(mxge_softc_t *sc) 2927 { 2928 char *ptr; 2929 int i; 2930 2931 ifmedia_removeall(&sc->media); 2932 mxge_media_set(sc, IFM_AUTO); 2933 2934 /* 2935 * parse the product code to deterimine the interface type 2936 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2937 * after the 3rd dash in the driver's cached copy of the 2938 * EEPROM's product code string. 2939 */ 2940 ptr = sc->product_code_string; 2941 if (ptr == NULL) { 2942 device_printf(sc->dev, "Missing product code\n"); 2943 return; 2944 } 2945 2946 for (i = 0; i < 3; i++, ptr++) { 2947 ptr = strchr(ptr, '-'); 2948 if (ptr == NULL) { 2949 device_printf(sc->dev, 2950 "only %d dashes in PC?!?\n", i); 2951 return; 2952 } 2953 } 2954 if (*ptr == 'C' || *(ptr +1) == 'C') { 2955 /* -C is CX4 */ 2956 sc->connector = MXGE_CX4; 2957 mxge_media_set(sc, IFM_10G_CX4); 2958 } else if (*ptr == 'Q') { 2959 /* -Q is Quad Ribbon Fiber */ 2960 sc->connector = MXGE_QRF; 2961 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2962 /* FreeBSD has no media type for Quad ribbon fiber */ 2963 } else if (*ptr == 'R') { 2964 /* -R is XFP */ 2965 sc->connector = MXGE_XFP; 2966 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2967 /* -S or -2S is SFP+ */ 2968 sc->connector = MXGE_SFP; 2969 } else { 2970 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2971 } 2972 } 2973 2974 /* 2975 * Determine the media type for a NIC. Some XFPs will identify 2976 * themselves only when their link is up, so this is initiated via a 2977 * link up interrupt. However, this can potentially take up to 2978 * several milliseconds, so it is run via the watchdog routine, rather 2979 * than in the interrupt handler itself. 2980 */ 2981 static void 2982 mxge_media_probe(mxge_softc_t *sc) 2983 { 2984 mxge_cmd_t cmd; 2985 char *cage_type; 2986 2987 struct mxge_media_type *mxge_media_types = NULL; 2988 int i, err, ms, mxge_media_type_entries; 2989 uint32_t byte; 2990 2991 sc->need_media_probe = 0; 2992 2993 if (sc->connector == MXGE_XFP) { 2994 /* -R is XFP */ 2995 mxge_media_types = mxge_xfp_media_types; 2996 mxge_media_type_entries = 2997 nitems(mxge_xfp_media_types); 2998 byte = MXGE_XFP_COMPLIANCE_BYTE; 2999 cage_type = "XFP"; 3000 } else if (sc->connector == MXGE_SFP) { 3001 /* -S or -2S is SFP+ */ 3002 mxge_media_types = mxge_sfp_media_types; 3003 mxge_media_type_entries = 3004 nitems(mxge_sfp_media_types); 3005 cage_type = "SFP+"; 3006 byte = 3; 3007 } else { 3008 /* nothing to do; media type cannot change */ 3009 return; 3010 } 3011 3012 /* 3013 * At this point we know the NIC has an XFP cage, so now we 3014 * try to determine what is in the cage by using the 3015 * firmware's XFP I2C commands to read the XFP 10GbE compilance 3016 * register. We read just one byte, which may take over 3017 * a millisecond 3018 */ 3019 3020 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3021 cmd.data1 = byte; 3022 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3023 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3024 device_printf(sc->dev, "failed to read XFP\n"); 3025 } 3026 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3027 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3028 } 3029 if (err != MXGEFW_CMD_OK) { 3030 return; 3031 } 3032 3033 /* now we wait for the data to be cached */ 3034 cmd.data0 = byte; 3035 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3036 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3037 DELAY(1000); 3038 cmd.data0 = byte; 3039 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3040 } 3041 if (err != MXGEFW_CMD_OK) { 3042 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3043 cage_type, err, ms); 3044 return; 3045 } 3046 3047 if (cmd.data0 == mxge_media_types[0].bitmask) { 3048 if (mxge_verbose) 3049 device_printf(sc->dev, "%s:%s\n", cage_type, 3050 mxge_media_types[0].name); 3051 if (sc->current_media != mxge_media_types[0].flag) { 3052 mxge_media_init(sc); 3053 mxge_media_set(sc, mxge_media_types[0].flag); 3054 } 3055 return; 3056 } 3057 for (i = 1; i < mxge_media_type_entries; i++) { 3058 if (cmd.data0 & mxge_media_types[i].bitmask) { 3059 if (mxge_verbose) 3060 device_printf(sc->dev, "%s:%s\n", 3061 cage_type, 3062 mxge_media_types[i].name); 3063 3064 if (sc->current_media != mxge_media_types[i].flag) { 3065 mxge_media_init(sc); 3066 mxge_media_set(sc, mxge_media_types[i].flag); 3067 } 3068 return; 3069 } 3070 } 3071 if (mxge_verbose) 3072 device_printf(sc->dev, "%s media 0x%x unknown\n", 3073 cage_type, cmd.data0); 3074 3075 return; 3076 } 3077 3078 static void 3079 mxge_intr(void *arg) 3080 { 3081 struct mxge_slice_state *ss = arg; 3082 mxge_softc_t *sc = ss->sc; 3083 mcp_irq_data_t *stats = ss->fw_stats; 3084 mxge_tx_ring_t *tx = &ss->tx; 3085 mxge_rx_done_t *rx_done = &ss->rx_done; 3086 uint32_t send_done_count; 3087 uint8_t valid; 3088 3089 3090 #ifndef IFNET_BUF_RING 3091 /* an interrupt on a non-zero slice is implicitly valid 3092 since MSI-X irqs are not shared */ 3093 if (ss != sc->ss) { 3094 mxge_clean_rx_done(ss); 3095 *ss->irq_claim = be32toh(3); 3096 return; 3097 } 3098 #endif 3099 3100 /* make sure the DMA has finished */ 3101 if (!stats->valid) { 3102 return; 3103 } 3104 valid = stats->valid; 3105 3106 if (sc->legacy_irq) { 3107 /* lower legacy IRQ */ 3108 *sc->irq_deassert = 0; 3109 if (!mxge_deassert_wait) 3110 /* don't wait for conf. that irq is low */ 3111 stats->valid = 0; 3112 } else { 3113 stats->valid = 0; 3114 } 3115 3116 /* loop while waiting for legacy irq deassertion */ 3117 do { 3118 /* check for transmit completes and receives */ 3119 send_done_count = be32toh(stats->send_done_count); 3120 while ((send_done_count != tx->pkt_done) || 3121 (rx_done->entry[rx_done->idx].length != 0)) { 3122 if (send_done_count != tx->pkt_done) 3123 mxge_tx_done(ss, (int)send_done_count); 3124 mxge_clean_rx_done(ss); 3125 send_done_count = be32toh(stats->send_done_count); 3126 } 3127 if (sc->legacy_irq && mxge_deassert_wait) 3128 wmb(); 3129 } while (*((volatile uint8_t *) &stats->valid)); 3130 3131 /* fw link & error stats meaningful only on the first slice */ 3132 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3133 if (sc->link_state != stats->link_up) { 3134 sc->link_state = stats->link_up; 3135 if (sc->link_state) { 3136 if_link_state_change(sc->ifp, LINK_STATE_UP); 3137 if (mxge_verbose) 3138 device_printf(sc->dev, "link up\n"); 3139 } else { 3140 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3141 if (mxge_verbose) 3142 device_printf(sc->dev, "link down\n"); 3143 } 3144 sc->need_media_probe = 1; 3145 } 3146 if (sc->rdma_tags_available != 3147 be32toh(stats->rdma_tags_available)) { 3148 sc->rdma_tags_available = 3149 be32toh(stats->rdma_tags_available); 3150 device_printf(sc->dev, "RDMA timed out! %d tags " 3151 "left\n", sc->rdma_tags_available); 3152 } 3153 3154 if (stats->link_down) { 3155 sc->down_cnt += stats->link_down; 3156 sc->link_state = 0; 3157 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3158 } 3159 } 3160 3161 /* check to see if we have rx token to pass back */ 3162 if (valid & 0x1) 3163 *ss->irq_claim = be32toh(3); 3164 *(ss->irq_claim + 1) = be32toh(3); 3165 } 3166 3167 static void 3168 mxge_init(void *arg) 3169 { 3170 mxge_softc_t *sc = arg; 3171 struct ifnet *ifp = sc->ifp; 3172 3173 3174 mtx_lock(&sc->driver_mtx); 3175 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3176 (void) mxge_open(sc); 3177 mtx_unlock(&sc->driver_mtx); 3178 } 3179 3180 3181 3182 static void 3183 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3184 { 3185 int i; 3186 3187 #if defined(INET) || defined(INET6) 3188 tcp_lro_free(&ss->lc); 3189 #endif 3190 for (i = 0; i <= ss->rx_big.mask; i++) { 3191 if (ss->rx_big.info[i].m == NULL) 3192 continue; 3193 bus_dmamap_unload(ss->rx_big.dmat, 3194 ss->rx_big.info[i].map); 3195 m_freem(ss->rx_big.info[i].m); 3196 ss->rx_big.info[i].m = NULL; 3197 } 3198 3199 for (i = 0; i <= ss->rx_small.mask; i++) { 3200 if (ss->rx_small.info[i].m == NULL) 3201 continue; 3202 bus_dmamap_unload(ss->rx_small.dmat, 3203 ss->rx_small.info[i].map); 3204 m_freem(ss->rx_small.info[i].m); 3205 ss->rx_small.info[i].m = NULL; 3206 } 3207 3208 /* transmit ring used only on the first slice */ 3209 if (ss->tx.info == NULL) 3210 return; 3211 3212 for (i = 0; i <= ss->tx.mask; i++) { 3213 ss->tx.info[i].flag = 0; 3214 if (ss->tx.info[i].m == NULL) 3215 continue; 3216 bus_dmamap_unload(ss->tx.dmat, 3217 ss->tx.info[i].map); 3218 m_freem(ss->tx.info[i].m); 3219 ss->tx.info[i].m = NULL; 3220 } 3221 } 3222 3223 static void 3224 mxge_free_mbufs(mxge_softc_t *sc) 3225 { 3226 int slice; 3227 3228 for (slice = 0; slice < sc->num_slices; slice++) 3229 mxge_free_slice_mbufs(&sc->ss[slice]); 3230 } 3231 3232 static void 3233 mxge_free_slice_rings(struct mxge_slice_state *ss) 3234 { 3235 int i; 3236 3237 3238 if (ss->rx_done.entry != NULL) 3239 mxge_dma_free(&ss->rx_done.dma); 3240 ss->rx_done.entry = NULL; 3241 3242 if (ss->tx.req_bytes != NULL) 3243 free(ss->tx.req_bytes, M_DEVBUF); 3244 ss->tx.req_bytes = NULL; 3245 3246 if (ss->tx.seg_list != NULL) 3247 free(ss->tx.seg_list, M_DEVBUF); 3248 ss->tx.seg_list = NULL; 3249 3250 if (ss->rx_small.shadow != NULL) 3251 free(ss->rx_small.shadow, M_DEVBUF); 3252 ss->rx_small.shadow = NULL; 3253 3254 if (ss->rx_big.shadow != NULL) 3255 free(ss->rx_big.shadow, M_DEVBUF); 3256 ss->rx_big.shadow = NULL; 3257 3258 if (ss->tx.info != NULL) { 3259 if (ss->tx.dmat != NULL) { 3260 for (i = 0; i <= ss->tx.mask; i++) { 3261 bus_dmamap_destroy(ss->tx.dmat, 3262 ss->tx.info[i].map); 3263 } 3264 bus_dma_tag_destroy(ss->tx.dmat); 3265 } 3266 free(ss->tx.info, M_DEVBUF); 3267 } 3268 ss->tx.info = NULL; 3269 3270 if (ss->rx_small.info != NULL) { 3271 if (ss->rx_small.dmat != NULL) { 3272 for (i = 0; i <= ss->rx_small.mask; i++) { 3273 bus_dmamap_destroy(ss->rx_small.dmat, 3274 ss->rx_small.info[i].map); 3275 } 3276 bus_dmamap_destroy(ss->rx_small.dmat, 3277 ss->rx_small.extra_map); 3278 bus_dma_tag_destroy(ss->rx_small.dmat); 3279 } 3280 free(ss->rx_small.info, M_DEVBUF); 3281 } 3282 ss->rx_small.info = NULL; 3283 3284 if (ss->rx_big.info != NULL) { 3285 if (ss->rx_big.dmat != NULL) { 3286 for (i = 0; i <= ss->rx_big.mask; i++) { 3287 bus_dmamap_destroy(ss->rx_big.dmat, 3288 ss->rx_big.info[i].map); 3289 } 3290 bus_dmamap_destroy(ss->rx_big.dmat, 3291 ss->rx_big.extra_map); 3292 bus_dma_tag_destroy(ss->rx_big.dmat); 3293 } 3294 free(ss->rx_big.info, M_DEVBUF); 3295 } 3296 ss->rx_big.info = NULL; 3297 } 3298 3299 static void 3300 mxge_free_rings(mxge_softc_t *sc) 3301 { 3302 int slice; 3303 3304 for (slice = 0; slice < sc->num_slices; slice++) 3305 mxge_free_slice_rings(&sc->ss[slice]); 3306 } 3307 3308 static int 3309 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3310 int tx_ring_entries) 3311 { 3312 mxge_softc_t *sc = ss->sc; 3313 size_t bytes; 3314 int err, i; 3315 3316 /* allocate per-slice receive resources */ 3317 3318 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3319 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3320 3321 /* allocate the rx shadow rings */ 3322 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3323 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3324 3325 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3326 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3327 3328 /* allocate the rx host info rings */ 3329 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3330 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3331 3332 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3333 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3334 3335 /* allocate the rx busdma resources */ 3336 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3337 1, /* alignment */ 3338 4096, /* boundary */ 3339 BUS_SPACE_MAXADDR, /* low */ 3340 BUS_SPACE_MAXADDR, /* high */ 3341 NULL, NULL, /* filter */ 3342 MHLEN, /* maxsize */ 3343 1, /* num segs */ 3344 MHLEN, /* maxsegsize */ 3345 BUS_DMA_ALLOCNOW, /* flags */ 3346 NULL, NULL, /* lock */ 3347 &ss->rx_small.dmat); /* tag */ 3348 if (err != 0) { 3349 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3350 err); 3351 return err; 3352 } 3353 3354 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3355 1, /* alignment */ 3356 #if MXGE_VIRT_JUMBOS 3357 4096, /* boundary */ 3358 #else 3359 0, /* boundary */ 3360 #endif 3361 BUS_SPACE_MAXADDR, /* low */ 3362 BUS_SPACE_MAXADDR, /* high */ 3363 NULL, NULL, /* filter */ 3364 3*4096, /* maxsize */ 3365 #if MXGE_VIRT_JUMBOS 3366 3, /* num segs */ 3367 4096, /* maxsegsize*/ 3368 #else 3369 1, /* num segs */ 3370 MJUM9BYTES, /* maxsegsize*/ 3371 #endif 3372 BUS_DMA_ALLOCNOW, /* flags */ 3373 NULL, NULL, /* lock */ 3374 &ss->rx_big.dmat); /* tag */ 3375 if (err != 0) { 3376 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3377 err); 3378 return err; 3379 } 3380 for (i = 0; i <= ss->rx_small.mask; i++) { 3381 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3382 &ss->rx_small.info[i].map); 3383 if (err != 0) { 3384 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3385 err); 3386 return err; 3387 } 3388 } 3389 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3390 &ss->rx_small.extra_map); 3391 if (err != 0) { 3392 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3393 err); 3394 return err; 3395 } 3396 3397 for (i = 0; i <= ss->rx_big.mask; i++) { 3398 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3399 &ss->rx_big.info[i].map); 3400 if (err != 0) { 3401 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3402 err); 3403 return err; 3404 } 3405 } 3406 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3407 &ss->rx_big.extra_map); 3408 if (err != 0) { 3409 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3410 err); 3411 return err; 3412 } 3413 3414 /* now allocate TX resources */ 3415 3416 #ifndef IFNET_BUF_RING 3417 /* only use a single TX ring for now */ 3418 if (ss != ss->sc->ss) 3419 return 0; 3420 #endif 3421 3422 ss->tx.mask = tx_ring_entries - 1; 3423 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3424 3425 3426 /* allocate the tx request copy block */ 3427 bytes = 8 + 3428 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3429 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3430 /* ensure req_list entries are aligned to 8 bytes */ 3431 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3432 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3433 3434 /* allocate the tx busdma segment list */ 3435 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3436 ss->tx.seg_list = (bus_dma_segment_t *) 3437 malloc(bytes, M_DEVBUF, M_WAITOK); 3438 3439 /* allocate the tx host info ring */ 3440 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3441 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3442 3443 /* allocate the tx busdma resources */ 3444 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3445 1, /* alignment */ 3446 sc->tx_boundary, /* boundary */ 3447 BUS_SPACE_MAXADDR, /* low */ 3448 BUS_SPACE_MAXADDR, /* high */ 3449 NULL, NULL, /* filter */ 3450 65536 + 256, /* maxsize */ 3451 ss->tx.max_desc - 2, /* num segs */ 3452 sc->tx_boundary, /* maxsegsz */ 3453 BUS_DMA_ALLOCNOW, /* flags */ 3454 NULL, NULL, /* lock */ 3455 &ss->tx.dmat); /* tag */ 3456 3457 if (err != 0) { 3458 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3459 err); 3460 return err; 3461 } 3462 3463 /* now use these tags to setup dmamaps for each slot 3464 in the ring */ 3465 for (i = 0; i <= ss->tx.mask; i++) { 3466 err = bus_dmamap_create(ss->tx.dmat, 0, 3467 &ss->tx.info[i].map); 3468 if (err != 0) { 3469 device_printf(sc->dev, "Err %d tx dmamap\n", 3470 err); 3471 return err; 3472 } 3473 } 3474 return 0; 3475 3476 } 3477 3478 static int 3479 mxge_alloc_rings(mxge_softc_t *sc) 3480 { 3481 mxge_cmd_t cmd; 3482 int tx_ring_size; 3483 int tx_ring_entries, rx_ring_entries; 3484 int err, slice; 3485 3486 /* get ring sizes */ 3487 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3488 tx_ring_size = cmd.data0; 3489 if (err != 0) { 3490 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3491 goto abort; 3492 } 3493 3494 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3495 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3496 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3497 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3498 IFQ_SET_READY(&sc->ifp->if_snd); 3499 3500 for (slice = 0; slice < sc->num_slices; slice++) { 3501 err = mxge_alloc_slice_rings(&sc->ss[slice], 3502 rx_ring_entries, 3503 tx_ring_entries); 3504 if (err != 0) 3505 goto abort; 3506 } 3507 return 0; 3508 3509 abort: 3510 mxge_free_rings(sc); 3511 return err; 3512 3513 } 3514 3515 3516 static void 3517 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3518 { 3519 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3520 3521 if (bufsize < MCLBYTES) { 3522 /* easy, everything fits in a single buffer */ 3523 *big_buf_size = MCLBYTES; 3524 *cl_size = MCLBYTES; 3525 *nbufs = 1; 3526 return; 3527 } 3528 3529 if (bufsize < MJUMPAGESIZE) { 3530 /* still easy, everything still fits in a single buffer */ 3531 *big_buf_size = MJUMPAGESIZE; 3532 *cl_size = MJUMPAGESIZE; 3533 *nbufs = 1; 3534 return; 3535 } 3536 #if MXGE_VIRT_JUMBOS 3537 /* now we need to use virtually contiguous buffers */ 3538 *cl_size = MJUM9BYTES; 3539 *big_buf_size = 4096; 3540 *nbufs = mtu / 4096 + 1; 3541 /* needs to be a power of two, so round up */ 3542 if (*nbufs == 3) 3543 *nbufs = 4; 3544 #else 3545 *cl_size = MJUM9BYTES; 3546 *big_buf_size = MJUM9BYTES; 3547 *nbufs = 1; 3548 #endif 3549 } 3550 3551 static int 3552 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3553 { 3554 mxge_softc_t *sc; 3555 mxge_cmd_t cmd; 3556 bus_dmamap_t map; 3557 int err, i, slice; 3558 3559 3560 sc = ss->sc; 3561 slice = ss - sc->ss; 3562 3563 #if defined(INET) || defined(INET6) 3564 (void)tcp_lro_init(&ss->lc); 3565 #endif 3566 ss->lc.ifp = sc->ifp; 3567 3568 /* get the lanai pointers to the send and receive rings */ 3569 3570 err = 0; 3571 #ifndef IFNET_BUF_RING 3572 /* We currently only send from the first slice */ 3573 if (slice == 0) { 3574 #endif 3575 cmd.data0 = slice; 3576 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3577 ss->tx.lanai = 3578 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3579 ss->tx.send_go = (volatile uint32_t *) 3580 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3581 ss->tx.send_stop = (volatile uint32_t *) 3582 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3583 #ifndef IFNET_BUF_RING 3584 } 3585 #endif 3586 cmd.data0 = slice; 3587 err |= mxge_send_cmd(sc, 3588 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3589 ss->rx_small.lanai = 3590 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3591 cmd.data0 = slice; 3592 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3593 ss->rx_big.lanai = 3594 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3595 3596 if (err != 0) { 3597 device_printf(sc->dev, 3598 "failed to get ring sizes or locations\n"); 3599 return EIO; 3600 } 3601 3602 /* stock receive rings */ 3603 for (i = 0; i <= ss->rx_small.mask; i++) { 3604 map = ss->rx_small.info[i].map; 3605 err = mxge_get_buf_small(ss, map, i); 3606 if (err) { 3607 device_printf(sc->dev, "alloced %d/%d smalls\n", 3608 i, ss->rx_small.mask + 1); 3609 return ENOMEM; 3610 } 3611 } 3612 for (i = 0; i <= ss->rx_big.mask; i++) { 3613 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3614 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3615 } 3616 ss->rx_big.nbufs = nbufs; 3617 ss->rx_big.cl_size = cl_size; 3618 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3619 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3620 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3621 map = ss->rx_big.info[i].map; 3622 err = mxge_get_buf_big(ss, map, i); 3623 if (err) { 3624 device_printf(sc->dev, "alloced %d/%d bigs\n", 3625 i, ss->rx_big.mask + 1); 3626 return ENOMEM; 3627 } 3628 } 3629 return 0; 3630 } 3631 3632 static int 3633 mxge_open(mxge_softc_t *sc) 3634 { 3635 mxge_cmd_t cmd; 3636 int err, big_bytes, nbufs, slice, cl_size, i; 3637 bus_addr_t bus; 3638 volatile uint8_t *itable; 3639 struct mxge_slice_state *ss; 3640 3641 /* Copy the MAC address in case it was overridden */ 3642 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3643 3644 err = mxge_reset(sc, 1); 3645 if (err != 0) { 3646 device_printf(sc->dev, "failed to reset\n"); 3647 return EIO; 3648 } 3649 3650 if (sc->num_slices > 1) { 3651 /* setup the indirection table */ 3652 cmd.data0 = sc->num_slices; 3653 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3654 &cmd); 3655 3656 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3657 &cmd); 3658 if (err != 0) { 3659 device_printf(sc->dev, 3660 "failed to setup rss tables\n"); 3661 return err; 3662 } 3663 3664 /* just enable an identity mapping */ 3665 itable = sc->sram + cmd.data0; 3666 for (i = 0; i < sc->num_slices; i++) 3667 itable[i] = (uint8_t)i; 3668 3669 cmd.data0 = 1; 3670 cmd.data1 = mxge_rss_hash_type; 3671 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3672 if (err != 0) { 3673 device_printf(sc->dev, "failed to enable slices\n"); 3674 return err; 3675 } 3676 } 3677 3678 3679 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3680 3681 cmd.data0 = nbufs; 3682 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3683 &cmd); 3684 /* error is only meaningful if we're trying to set 3685 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3686 if (err && nbufs > 1) { 3687 device_printf(sc->dev, 3688 "Failed to set alway-use-n to %d\n", 3689 nbufs); 3690 return EIO; 3691 } 3692 /* Give the firmware the mtu and the big and small buffer 3693 sizes. The firmware wants the big buf size to be a power 3694 of two. Luckily, FreeBSD's clusters are powers of two */ 3695 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3696 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3697 cmd.data0 = MHLEN - MXGEFW_PAD; 3698 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3699 &cmd); 3700 cmd.data0 = big_bytes; 3701 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3702 3703 if (err != 0) { 3704 device_printf(sc->dev, "failed to setup params\n"); 3705 goto abort; 3706 } 3707 3708 /* Now give him the pointer to the stats block */ 3709 for (slice = 0; 3710 #ifdef IFNET_BUF_RING 3711 slice < sc->num_slices; 3712 #else 3713 slice < 1; 3714 #endif 3715 slice++) { 3716 ss = &sc->ss[slice]; 3717 cmd.data0 = 3718 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3719 cmd.data1 = 3720 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3721 cmd.data2 = sizeof(struct mcp_irq_data); 3722 cmd.data2 |= (slice << 16); 3723 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3724 } 3725 3726 if (err != 0) { 3727 bus = sc->ss->fw_stats_dma.bus_addr; 3728 bus += offsetof(struct mcp_irq_data, send_done_count); 3729 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3730 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3731 err = mxge_send_cmd(sc, 3732 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3733 &cmd); 3734 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3735 sc->fw_multicast_support = 0; 3736 } else { 3737 sc->fw_multicast_support = 1; 3738 } 3739 3740 if (err != 0) { 3741 device_printf(sc->dev, "failed to setup params\n"); 3742 goto abort; 3743 } 3744 3745 for (slice = 0; slice < sc->num_slices; slice++) { 3746 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3747 if (err != 0) { 3748 device_printf(sc->dev, "couldn't open slice %d\n", 3749 slice); 3750 goto abort; 3751 } 3752 } 3753 3754 /* Finally, start the firmware running */ 3755 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3756 if (err) { 3757 device_printf(sc->dev, "Couldn't bring up link\n"); 3758 goto abort; 3759 } 3760 #ifdef IFNET_BUF_RING 3761 for (slice = 0; slice < sc->num_slices; slice++) { 3762 ss = &sc->ss[slice]; 3763 ss->if_drv_flags |= IFF_DRV_RUNNING; 3764 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3765 } 3766 #endif 3767 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3768 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3769 3770 return 0; 3771 3772 3773 abort: 3774 mxge_free_mbufs(sc); 3775 3776 return err; 3777 } 3778 3779 static int 3780 mxge_close(mxge_softc_t *sc, int down) 3781 { 3782 mxge_cmd_t cmd; 3783 int err, old_down_cnt; 3784 #ifdef IFNET_BUF_RING 3785 struct mxge_slice_state *ss; 3786 int slice; 3787 #endif 3788 3789 #ifdef IFNET_BUF_RING 3790 for (slice = 0; slice < sc->num_slices; slice++) { 3791 ss = &sc->ss[slice]; 3792 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3793 } 3794 #endif 3795 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3796 if (!down) { 3797 old_down_cnt = sc->down_cnt; 3798 wmb(); 3799 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3800 if (err) { 3801 device_printf(sc->dev, 3802 "Couldn't bring down link\n"); 3803 } 3804 if (old_down_cnt == sc->down_cnt) { 3805 /* wait for down irq */ 3806 DELAY(10 * sc->intr_coal_delay); 3807 } 3808 wmb(); 3809 if (old_down_cnt == sc->down_cnt) { 3810 device_printf(sc->dev, "never got down irq\n"); 3811 } 3812 } 3813 mxge_free_mbufs(sc); 3814 3815 return 0; 3816 } 3817 3818 static void 3819 mxge_setup_cfg_space(mxge_softc_t *sc) 3820 { 3821 device_t dev = sc->dev; 3822 int reg; 3823 uint16_t lnk, pectl; 3824 3825 /* find the PCIe link width and set max read request to 4KB*/ 3826 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3827 lnk = pci_read_config(dev, reg + 0x12, 2); 3828 sc->link_width = (lnk >> 4) & 0x3f; 3829 3830 if (sc->pectl == 0) { 3831 pectl = pci_read_config(dev, reg + 0x8, 2); 3832 pectl = (pectl & ~0x7000) | (5 << 12); 3833 pci_write_config(dev, reg + 0x8, pectl, 2); 3834 sc->pectl = pectl; 3835 } else { 3836 /* restore saved pectl after watchdog reset */ 3837 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3838 } 3839 } 3840 3841 /* Enable DMA and Memory space access */ 3842 pci_enable_busmaster(dev); 3843 } 3844 3845 static uint32_t 3846 mxge_read_reboot(mxge_softc_t *sc) 3847 { 3848 device_t dev = sc->dev; 3849 uint32_t vs; 3850 3851 /* find the vendor specific offset */ 3852 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3853 device_printf(sc->dev, 3854 "could not find vendor specific offset\n"); 3855 return (uint32_t)-1; 3856 } 3857 /* enable read32 mode */ 3858 pci_write_config(dev, vs + 0x10, 0x3, 1); 3859 /* tell NIC which register to read */ 3860 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3861 return (pci_read_config(dev, vs + 0x14, 4)); 3862 } 3863 3864 static void 3865 mxge_watchdog_reset(mxge_softc_t *sc) 3866 { 3867 struct pci_devinfo *dinfo; 3868 struct mxge_slice_state *ss; 3869 int err, running, s, num_tx_slices = 1; 3870 uint32_t reboot; 3871 uint16_t cmd; 3872 3873 err = ENXIO; 3874 3875 device_printf(sc->dev, "Watchdog reset!\n"); 3876 3877 /* 3878 * check to see if the NIC rebooted. If it did, then all of 3879 * PCI config space has been reset, and things like the 3880 * busmaster bit will be zero. If this is the case, then we 3881 * must restore PCI config space before the NIC can be used 3882 * again 3883 */ 3884 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3885 if (cmd == 0xffff) { 3886 /* 3887 * maybe the watchdog caught the NIC rebooting; wait 3888 * up to 100ms for it to finish. If it does not come 3889 * back, then give up 3890 */ 3891 DELAY(1000*100); 3892 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3893 if (cmd == 0xffff) { 3894 device_printf(sc->dev, "NIC disappeared!\n"); 3895 } 3896 } 3897 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3898 /* print the reboot status */ 3899 reboot = mxge_read_reboot(sc); 3900 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3901 reboot); 3902 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3903 if (running) { 3904 3905 /* 3906 * quiesce NIC so that TX routines will not try to 3907 * xmit after restoration of BAR 3908 */ 3909 3910 /* Mark the link as down */ 3911 if (sc->link_state) { 3912 sc->link_state = 0; 3913 if_link_state_change(sc->ifp, 3914 LINK_STATE_DOWN); 3915 } 3916 #ifdef IFNET_BUF_RING 3917 num_tx_slices = sc->num_slices; 3918 #endif 3919 /* grab all TX locks to ensure no tx */ 3920 for (s = 0; s < num_tx_slices; s++) { 3921 ss = &sc->ss[s]; 3922 mtx_lock(&ss->tx.mtx); 3923 } 3924 mxge_close(sc, 1); 3925 } 3926 /* restore PCI configuration space */ 3927 dinfo = device_get_ivars(sc->dev); 3928 pci_cfg_restore(sc->dev, dinfo); 3929 3930 /* and redo any changes we made to our config space */ 3931 mxge_setup_cfg_space(sc); 3932 3933 /* reload f/w */ 3934 err = mxge_load_firmware(sc, 0); 3935 if (err) { 3936 device_printf(sc->dev, 3937 "Unable to re-load f/w\n"); 3938 } 3939 if (running) { 3940 if (!err) 3941 err = mxge_open(sc); 3942 /* release all TX locks */ 3943 for (s = 0; s < num_tx_slices; s++) { 3944 ss = &sc->ss[s]; 3945 #ifdef IFNET_BUF_RING 3946 mxge_start_locked(ss); 3947 #endif 3948 mtx_unlock(&ss->tx.mtx); 3949 } 3950 } 3951 sc->watchdog_resets++; 3952 } else { 3953 device_printf(sc->dev, 3954 "NIC did not reboot, not resetting\n"); 3955 err = 0; 3956 } 3957 if (err) { 3958 device_printf(sc->dev, "watchdog reset failed\n"); 3959 } else { 3960 if (sc->dying == 2) 3961 sc->dying = 0; 3962 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3963 } 3964 } 3965 3966 static void 3967 mxge_watchdog_task(void *arg, int pending) 3968 { 3969 mxge_softc_t *sc = arg; 3970 3971 3972 mtx_lock(&sc->driver_mtx); 3973 mxge_watchdog_reset(sc); 3974 mtx_unlock(&sc->driver_mtx); 3975 } 3976 3977 static void 3978 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3979 { 3980 tx = &sc->ss[slice].tx; 3981 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3982 device_printf(sc->dev, 3983 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3984 tx->req, tx->done, tx->queue_active); 3985 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3986 tx->activate, tx->deactivate); 3987 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3988 tx->pkt_done, 3989 be32toh(sc->ss->fw_stats->send_done_count)); 3990 } 3991 3992 static int 3993 mxge_watchdog(mxge_softc_t *sc) 3994 { 3995 mxge_tx_ring_t *tx; 3996 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3997 int i, err = 0; 3998 3999 /* see if we have outstanding transmits, which 4000 have been pending for more than mxge_ticks */ 4001 for (i = 0; 4002 #ifdef IFNET_BUF_RING 4003 (i < sc->num_slices) && (err == 0); 4004 #else 4005 (i < 1) && (err == 0); 4006 #endif 4007 i++) { 4008 tx = &sc->ss[i].tx; 4009 if (tx->req != tx->done && 4010 tx->watchdog_req != tx->watchdog_done && 4011 tx->done == tx->watchdog_done) { 4012 /* check for pause blocking before resetting */ 4013 if (tx->watchdog_rx_pause == rx_pause) { 4014 mxge_warn_stuck(sc, tx, i); 4015 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4016 return (ENXIO); 4017 } 4018 else 4019 device_printf(sc->dev, "Flow control blocking " 4020 "xmits, check link partner\n"); 4021 } 4022 4023 tx->watchdog_req = tx->req; 4024 tx->watchdog_done = tx->done; 4025 tx->watchdog_rx_pause = rx_pause; 4026 } 4027 4028 if (sc->need_media_probe) 4029 mxge_media_probe(sc); 4030 return (err); 4031 } 4032 4033 static uint64_t 4034 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 4035 { 4036 struct mxge_softc *sc; 4037 uint64_t rv; 4038 4039 sc = if_getsoftc(ifp); 4040 rv = 0; 4041 4042 switch (cnt) { 4043 case IFCOUNTER_IPACKETS: 4044 for (int s = 0; s < sc->num_slices; s++) 4045 rv += sc->ss[s].ipackets; 4046 return (rv); 4047 case IFCOUNTER_OPACKETS: 4048 for (int s = 0; s < sc->num_slices; s++) 4049 rv += sc->ss[s].opackets; 4050 return (rv); 4051 case IFCOUNTER_OERRORS: 4052 for (int s = 0; s < sc->num_slices; s++) 4053 rv += sc->ss[s].oerrors; 4054 return (rv); 4055 #ifdef IFNET_BUF_RING 4056 case IFCOUNTER_OBYTES: 4057 for (int s = 0; s < sc->num_slices; s++) 4058 rv += sc->ss[s].obytes; 4059 return (rv); 4060 case IFCOUNTER_OMCASTS: 4061 for (int s = 0; s < sc->num_slices; s++) 4062 rv += sc->ss[s].omcasts; 4063 return (rv); 4064 case IFCOUNTER_OQDROPS: 4065 for (int s = 0; s < sc->num_slices; s++) 4066 rv += sc->ss[s].tx.br->br_drops; 4067 return (rv); 4068 #endif 4069 default: 4070 return (if_get_counter_default(ifp, cnt)); 4071 } 4072 } 4073 4074 static void 4075 mxge_tick(void *arg) 4076 { 4077 mxge_softc_t *sc = arg; 4078 u_long pkts = 0; 4079 int err = 0; 4080 int running, ticks; 4081 uint16_t cmd; 4082 4083 ticks = mxge_ticks; 4084 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4085 if (running) { 4086 if (!sc->watchdog_countdown) { 4087 err = mxge_watchdog(sc); 4088 sc->watchdog_countdown = 4; 4089 } 4090 sc->watchdog_countdown--; 4091 } 4092 if (pkts == 0) { 4093 /* ensure NIC did not suffer h/w fault while idle */ 4094 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4095 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4096 sc->dying = 2; 4097 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4098 err = ENXIO; 4099 } 4100 /* look less often if NIC is idle */ 4101 ticks *= 4; 4102 } 4103 4104 if (err == 0) 4105 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4106 4107 } 4108 4109 static int 4110 mxge_media_change(struct ifnet *ifp) 4111 { 4112 return EINVAL; 4113 } 4114 4115 static int 4116 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4117 { 4118 struct ifnet *ifp = sc->ifp; 4119 int real_mtu, old_mtu; 4120 int err = 0; 4121 4122 4123 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4124 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4125 return EINVAL; 4126 mtx_lock(&sc->driver_mtx); 4127 old_mtu = ifp->if_mtu; 4128 ifp->if_mtu = mtu; 4129 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4130 mxge_close(sc, 0); 4131 err = mxge_open(sc); 4132 if (err != 0) { 4133 ifp->if_mtu = old_mtu; 4134 mxge_close(sc, 0); 4135 (void) mxge_open(sc); 4136 } 4137 } 4138 mtx_unlock(&sc->driver_mtx); 4139 return err; 4140 } 4141 4142 static void 4143 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4144 { 4145 mxge_softc_t *sc = ifp->if_softc; 4146 4147 4148 if (sc == NULL) 4149 return; 4150 ifmr->ifm_status = IFM_AVALID; 4151 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4152 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4153 ifmr->ifm_active |= sc->current_media; 4154 } 4155 4156 static int 4157 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4158 { 4159 mxge_softc_t *sc = ifp->if_softc; 4160 struct ifreq *ifr = (struct ifreq *)data; 4161 int err, mask; 4162 4163 err = 0; 4164 switch (command) { 4165 case SIOCSIFADDR: 4166 case SIOCGIFADDR: 4167 err = ether_ioctl(ifp, command, data); 4168 break; 4169 4170 case SIOCSIFMTU: 4171 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4172 break; 4173 4174 case SIOCSIFFLAGS: 4175 mtx_lock(&sc->driver_mtx); 4176 if (sc->dying) { 4177 mtx_unlock(&sc->driver_mtx); 4178 return EINVAL; 4179 } 4180 if (ifp->if_flags & IFF_UP) { 4181 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4182 err = mxge_open(sc); 4183 } else { 4184 /* take care of promis can allmulti 4185 flag chages */ 4186 mxge_change_promisc(sc, 4187 ifp->if_flags & IFF_PROMISC); 4188 mxge_set_multicast_list(sc); 4189 } 4190 } else { 4191 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4192 mxge_close(sc, 0); 4193 } 4194 } 4195 mtx_unlock(&sc->driver_mtx); 4196 break; 4197 4198 case SIOCADDMULTI: 4199 case SIOCDELMULTI: 4200 mtx_lock(&sc->driver_mtx); 4201 mxge_set_multicast_list(sc); 4202 mtx_unlock(&sc->driver_mtx); 4203 break; 4204 4205 case SIOCSIFCAP: 4206 mtx_lock(&sc->driver_mtx); 4207 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4208 if (mask & IFCAP_TXCSUM) { 4209 if (IFCAP_TXCSUM & ifp->if_capenable) { 4210 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4211 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4212 } else { 4213 ifp->if_capenable |= IFCAP_TXCSUM; 4214 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4215 } 4216 } else if (mask & IFCAP_RXCSUM) { 4217 if (IFCAP_RXCSUM & ifp->if_capenable) { 4218 ifp->if_capenable &= ~IFCAP_RXCSUM; 4219 } else { 4220 ifp->if_capenable |= IFCAP_RXCSUM; 4221 } 4222 } 4223 if (mask & IFCAP_TSO4) { 4224 if (IFCAP_TSO4 & ifp->if_capenable) { 4225 ifp->if_capenable &= ~IFCAP_TSO4; 4226 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4227 ifp->if_capenable |= IFCAP_TSO4; 4228 ifp->if_hwassist |= CSUM_TSO; 4229 } else { 4230 printf("mxge requires tx checksum offload" 4231 " be enabled to use TSO\n"); 4232 err = EINVAL; 4233 } 4234 } 4235 #if IFCAP_TSO6 4236 if (mask & IFCAP_TXCSUM_IPV6) { 4237 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4238 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4239 | IFCAP_TSO6); 4240 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4241 | CSUM_UDP); 4242 } else { 4243 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4244 ifp->if_hwassist |= (CSUM_TCP_IPV6 4245 | CSUM_UDP_IPV6); 4246 } 4247 } else if (mask & IFCAP_RXCSUM_IPV6) { 4248 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4249 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4250 } else { 4251 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4252 } 4253 } 4254 if (mask & IFCAP_TSO6) { 4255 if (IFCAP_TSO6 & ifp->if_capenable) { 4256 ifp->if_capenable &= ~IFCAP_TSO6; 4257 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4258 ifp->if_capenable |= IFCAP_TSO6; 4259 ifp->if_hwassist |= CSUM_TSO; 4260 } else { 4261 printf("mxge requires tx checksum offload" 4262 " be enabled to use TSO\n"); 4263 err = EINVAL; 4264 } 4265 } 4266 #endif /*IFCAP_TSO6 */ 4267 4268 if (mask & IFCAP_LRO) 4269 ifp->if_capenable ^= IFCAP_LRO; 4270 if (mask & IFCAP_VLAN_HWTAGGING) 4271 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4272 if (mask & IFCAP_VLAN_HWTSO) 4273 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4274 4275 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4276 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4277 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4278 4279 mtx_unlock(&sc->driver_mtx); 4280 VLAN_CAPABILITIES(ifp); 4281 4282 break; 4283 4284 case SIOCGIFMEDIA: 4285 mtx_lock(&sc->driver_mtx); 4286 mxge_media_probe(sc); 4287 mtx_unlock(&sc->driver_mtx); 4288 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4289 &sc->media, command); 4290 break; 4291 4292 default: 4293 err = ENOTTY; 4294 } 4295 return err; 4296 } 4297 4298 static void 4299 mxge_fetch_tunables(mxge_softc_t *sc) 4300 { 4301 4302 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4303 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4304 &mxge_flow_control); 4305 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4306 &mxge_intr_coal_delay); 4307 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4308 &mxge_nvidia_ecrc_enable); 4309 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4310 &mxge_force_firmware); 4311 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4312 &mxge_deassert_wait); 4313 TUNABLE_INT_FETCH("hw.mxge.verbose", 4314 &mxge_verbose); 4315 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4316 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4317 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4318 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4319 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4320 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4321 4322 if (bootverbose) 4323 mxge_verbose = 1; 4324 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4325 mxge_intr_coal_delay = 30; 4326 if (mxge_ticks == 0) 4327 mxge_ticks = hz / 2; 4328 sc->pause = mxge_flow_control; 4329 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4330 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4331 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4332 } 4333 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4334 mxge_initial_mtu < ETHER_MIN_LEN) 4335 mxge_initial_mtu = ETHERMTU_JUMBO; 4336 4337 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4338 mxge_throttle = MXGE_MAX_THROTTLE; 4339 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4340 mxge_throttle = MXGE_MIN_THROTTLE; 4341 sc->throttle = mxge_throttle; 4342 } 4343 4344 4345 static void 4346 mxge_free_slices(mxge_softc_t *sc) 4347 { 4348 struct mxge_slice_state *ss; 4349 int i; 4350 4351 4352 if (sc->ss == NULL) 4353 return; 4354 4355 for (i = 0; i < sc->num_slices; i++) { 4356 ss = &sc->ss[i]; 4357 if (ss->fw_stats != NULL) { 4358 mxge_dma_free(&ss->fw_stats_dma); 4359 ss->fw_stats = NULL; 4360 #ifdef IFNET_BUF_RING 4361 if (ss->tx.br != NULL) { 4362 drbr_free(ss->tx.br, M_DEVBUF); 4363 ss->tx.br = NULL; 4364 } 4365 #endif 4366 mtx_destroy(&ss->tx.mtx); 4367 } 4368 if (ss->rx_done.entry != NULL) { 4369 mxge_dma_free(&ss->rx_done.dma); 4370 ss->rx_done.entry = NULL; 4371 } 4372 } 4373 free(sc->ss, M_DEVBUF); 4374 sc->ss = NULL; 4375 } 4376 4377 static int 4378 mxge_alloc_slices(mxge_softc_t *sc) 4379 { 4380 mxge_cmd_t cmd; 4381 struct mxge_slice_state *ss; 4382 size_t bytes; 4383 int err, i, max_intr_slots; 4384 4385 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4386 if (err != 0) { 4387 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4388 return err; 4389 } 4390 sc->rx_ring_size = cmd.data0; 4391 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4392 4393 bytes = sizeof (*sc->ss) * sc->num_slices; 4394 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4395 if (sc->ss == NULL) 4396 return (ENOMEM); 4397 for (i = 0; i < sc->num_slices; i++) { 4398 ss = &sc->ss[i]; 4399 4400 ss->sc = sc; 4401 4402 /* allocate per-slice rx interrupt queues */ 4403 4404 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4405 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4406 if (err != 0) 4407 goto abort; 4408 ss->rx_done.entry = ss->rx_done.dma.addr; 4409 bzero(ss->rx_done.entry, bytes); 4410 4411 /* 4412 * allocate the per-slice firmware stats; stats 4413 * (including tx) are used used only on the first 4414 * slice for now 4415 */ 4416 #ifndef IFNET_BUF_RING 4417 if (i > 0) 4418 continue; 4419 #endif 4420 4421 bytes = sizeof (*ss->fw_stats); 4422 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4423 sizeof (*ss->fw_stats), 64); 4424 if (err != 0) 4425 goto abort; 4426 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4427 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4428 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4429 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4430 #ifdef IFNET_BUF_RING 4431 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4432 &ss->tx.mtx); 4433 #endif 4434 } 4435 4436 return (0); 4437 4438 abort: 4439 mxge_free_slices(sc); 4440 return (ENOMEM); 4441 } 4442 4443 static void 4444 mxge_slice_probe(mxge_softc_t *sc) 4445 { 4446 mxge_cmd_t cmd; 4447 char *old_fw; 4448 int msix_cnt, status, max_intr_slots; 4449 4450 sc->num_slices = 1; 4451 /* 4452 * don't enable multiple slices if they are not enabled, 4453 * or if this is not an SMP system 4454 */ 4455 4456 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4457 return; 4458 4459 /* see how many MSI-X interrupts are available */ 4460 msix_cnt = pci_msix_count(sc->dev); 4461 if (msix_cnt < 2) 4462 return; 4463 4464 /* now load the slice aware firmware see what it supports */ 4465 old_fw = sc->fw_name; 4466 if (old_fw == mxge_fw_aligned) 4467 sc->fw_name = mxge_fw_rss_aligned; 4468 else 4469 sc->fw_name = mxge_fw_rss_unaligned; 4470 status = mxge_load_firmware(sc, 0); 4471 if (status != 0) { 4472 device_printf(sc->dev, "Falling back to a single slice\n"); 4473 return; 4474 } 4475 4476 /* try to send a reset command to the card to see if it 4477 is alive */ 4478 memset(&cmd, 0, sizeof (cmd)); 4479 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4480 if (status != 0) { 4481 device_printf(sc->dev, "failed reset\n"); 4482 goto abort_with_fw; 4483 } 4484 4485 /* get rx ring size */ 4486 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4487 if (status != 0) { 4488 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4489 goto abort_with_fw; 4490 } 4491 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4492 4493 /* tell it the size of the interrupt queues */ 4494 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4495 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4496 if (status != 0) { 4497 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4498 goto abort_with_fw; 4499 } 4500 4501 /* ask the maximum number of slices it supports */ 4502 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4503 if (status != 0) { 4504 device_printf(sc->dev, 4505 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4506 goto abort_with_fw; 4507 } 4508 sc->num_slices = cmd.data0; 4509 if (sc->num_slices > msix_cnt) 4510 sc->num_slices = msix_cnt; 4511 4512 if (mxge_max_slices == -1) { 4513 /* cap to number of CPUs in system */ 4514 if (sc->num_slices > mp_ncpus) 4515 sc->num_slices = mp_ncpus; 4516 } else { 4517 if (sc->num_slices > mxge_max_slices) 4518 sc->num_slices = mxge_max_slices; 4519 } 4520 /* make sure it is a power of two */ 4521 while (sc->num_slices & (sc->num_slices - 1)) 4522 sc->num_slices--; 4523 4524 if (mxge_verbose) 4525 device_printf(sc->dev, "using %d slices\n", 4526 sc->num_slices); 4527 4528 return; 4529 4530 abort_with_fw: 4531 sc->fw_name = old_fw; 4532 (void) mxge_load_firmware(sc, 0); 4533 } 4534 4535 static int 4536 mxge_add_msix_irqs(mxge_softc_t *sc) 4537 { 4538 size_t bytes; 4539 int count, err, i, rid; 4540 4541 rid = PCIR_BAR(2); 4542 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4543 &rid, RF_ACTIVE); 4544 4545 if (sc->msix_table_res == NULL) { 4546 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4547 return ENXIO; 4548 } 4549 4550 count = sc->num_slices; 4551 err = pci_alloc_msix(sc->dev, &count); 4552 if (err != 0) { 4553 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4554 "err = %d \n", sc->num_slices, err); 4555 goto abort_with_msix_table; 4556 } 4557 if (count < sc->num_slices) { 4558 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4559 count, sc->num_slices); 4560 device_printf(sc->dev, 4561 "Try setting hw.mxge.max_slices to %d\n", 4562 count); 4563 err = ENOSPC; 4564 goto abort_with_msix; 4565 } 4566 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4567 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4568 if (sc->msix_irq_res == NULL) { 4569 err = ENOMEM; 4570 goto abort_with_msix; 4571 } 4572 4573 for (i = 0; i < sc->num_slices; i++) { 4574 rid = i + 1; 4575 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4576 SYS_RES_IRQ, 4577 &rid, RF_ACTIVE); 4578 if (sc->msix_irq_res[i] == NULL) { 4579 device_printf(sc->dev, "couldn't allocate IRQ res" 4580 " for message %d\n", i); 4581 err = ENXIO; 4582 goto abort_with_res; 4583 } 4584 } 4585 4586 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4587 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4588 4589 for (i = 0; i < sc->num_slices; i++) { 4590 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4591 INTR_TYPE_NET | INTR_MPSAFE, 4592 #if __FreeBSD_version > 700030 4593 NULL, 4594 #endif 4595 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4596 if (err != 0) { 4597 device_printf(sc->dev, "couldn't setup intr for " 4598 "message %d\n", i); 4599 goto abort_with_intr; 4600 } 4601 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4602 sc->msix_ih[i], "s%d", i); 4603 } 4604 4605 if (mxge_verbose) { 4606 device_printf(sc->dev, "using %d msix IRQs:", 4607 sc->num_slices); 4608 for (i = 0; i < sc->num_slices; i++) 4609 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4610 printf("\n"); 4611 } 4612 return (0); 4613 4614 abort_with_intr: 4615 for (i = 0; i < sc->num_slices; i++) { 4616 if (sc->msix_ih[i] != NULL) { 4617 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4618 sc->msix_ih[i]); 4619 sc->msix_ih[i] = NULL; 4620 } 4621 } 4622 free(sc->msix_ih, M_DEVBUF); 4623 4624 4625 abort_with_res: 4626 for (i = 0; i < sc->num_slices; i++) { 4627 rid = i + 1; 4628 if (sc->msix_irq_res[i] != NULL) 4629 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4630 sc->msix_irq_res[i]); 4631 sc->msix_irq_res[i] = NULL; 4632 } 4633 free(sc->msix_irq_res, M_DEVBUF); 4634 4635 4636 abort_with_msix: 4637 pci_release_msi(sc->dev); 4638 4639 abort_with_msix_table: 4640 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4641 sc->msix_table_res); 4642 4643 return err; 4644 } 4645 4646 static int 4647 mxge_add_single_irq(mxge_softc_t *sc) 4648 { 4649 int count, err, rid; 4650 4651 count = pci_msi_count(sc->dev); 4652 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4653 rid = 1; 4654 } else { 4655 rid = 0; 4656 sc->legacy_irq = 1; 4657 } 4658 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4659 RF_SHAREABLE | RF_ACTIVE); 4660 if (sc->irq_res == NULL) { 4661 device_printf(sc->dev, "could not alloc interrupt\n"); 4662 return ENXIO; 4663 } 4664 if (mxge_verbose) 4665 device_printf(sc->dev, "using %s irq %jd\n", 4666 sc->legacy_irq ? "INTx" : "MSI", 4667 rman_get_start(sc->irq_res)); 4668 err = bus_setup_intr(sc->dev, sc->irq_res, 4669 INTR_TYPE_NET | INTR_MPSAFE, 4670 #if __FreeBSD_version > 700030 4671 NULL, 4672 #endif 4673 mxge_intr, &sc->ss[0], &sc->ih); 4674 if (err != 0) { 4675 bus_release_resource(sc->dev, SYS_RES_IRQ, 4676 sc->legacy_irq ? 0 : 1, sc->irq_res); 4677 if (!sc->legacy_irq) 4678 pci_release_msi(sc->dev); 4679 } 4680 return err; 4681 } 4682 4683 static void 4684 mxge_rem_msix_irqs(mxge_softc_t *sc) 4685 { 4686 int i, rid; 4687 4688 for (i = 0; i < sc->num_slices; i++) { 4689 if (sc->msix_ih[i] != NULL) { 4690 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4691 sc->msix_ih[i]); 4692 sc->msix_ih[i] = NULL; 4693 } 4694 } 4695 free(sc->msix_ih, M_DEVBUF); 4696 4697 for (i = 0; i < sc->num_slices; i++) { 4698 rid = i + 1; 4699 if (sc->msix_irq_res[i] != NULL) 4700 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4701 sc->msix_irq_res[i]); 4702 sc->msix_irq_res[i] = NULL; 4703 } 4704 free(sc->msix_irq_res, M_DEVBUF); 4705 4706 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4707 sc->msix_table_res); 4708 4709 pci_release_msi(sc->dev); 4710 return; 4711 } 4712 4713 static void 4714 mxge_rem_single_irq(mxge_softc_t *sc) 4715 { 4716 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4717 bus_release_resource(sc->dev, SYS_RES_IRQ, 4718 sc->legacy_irq ? 0 : 1, sc->irq_res); 4719 if (!sc->legacy_irq) 4720 pci_release_msi(sc->dev); 4721 } 4722 4723 static void 4724 mxge_rem_irq(mxge_softc_t *sc) 4725 { 4726 if (sc->num_slices > 1) 4727 mxge_rem_msix_irqs(sc); 4728 else 4729 mxge_rem_single_irq(sc); 4730 } 4731 4732 static int 4733 mxge_add_irq(mxge_softc_t *sc) 4734 { 4735 int err; 4736 4737 if (sc->num_slices > 1) 4738 err = mxge_add_msix_irqs(sc); 4739 else 4740 err = mxge_add_single_irq(sc); 4741 4742 if (0 && err == 0 && sc->num_slices > 1) { 4743 mxge_rem_msix_irqs(sc); 4744 err = mxge_add_msix_irqs(sc); 4745 } 4746 return err; 4747 } 4748 4749 4750 static int 4751 mxge_attach(device_t dev) 4752 { 4753 mxge_cmd_t cmd; 4754 mxge_softc_t *sc = device_get_softc(dev); 4755 struct ifnet *ifp; 4756 int err, rid; 4757 4758 sc->dev = dev; 4759 mxge_fetch_tunables(sc); 4760 4761 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4762 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4763 taskqueue_thread_enqueue, &sc->tq); 4764 if (sc->tq == NULL) { 4765 err = ENOMEM; 4766 goto abort_with_nothing; 4767 } 4768 4769 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4770 1, /* alignment */ 4771 0, /* boundary */ 4772 BUS_SPACE_MAXADDR, /* low */ 4773 BUS_SPACE_MAXADDR, /* high */ 4774 NULL, NULL, /* filter */ 4775 65536 + 256, /* maxsize */ 4776 MXGE_MAX_SEND_DESC, /* num segs */ 4777 65536, /* maxsegsize */ 4778 0, /* flags */ 4779 NULL, NULL, /* lock */ 4780 &sc->parent_dmat); /* tag */ 4781 4782 if (err != 0) { 4783 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4784 err); 4785 goto abort_with_tq; 4786 } 4787 4788 ifp = sc->ifp = if_alloc(IFT_ETHER); 4789 if (ifp == NULL) { 4790 device_printf(dev, "can not if_alloc()\n"); 4791 err = ENOSPC; 4792 goto abort_with_parent_dmat; 4793 } 4794 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4795 4796 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4797 device_get_nameunit(dev)); 4798 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4799 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4800 "%s:drv", device_get_nameunit(dev)); 4801 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4802 MTX_NETWORK_LOCK, MTX_DEF); 4803 4804 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4805 4806 mxge_setup_cfg_space(sc); 4807 4808 /* Map the board into the kernel */ 4809 rid = PCIR_BARS; 4810 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4811 RF_ACTIVE); 4812 if (sc->mem_res == NULL) { 4813 device_printf(dev, "could not map memory\n"); 4814 err = ENXIO; 4815 goto abort_with_lock; 4816 } 4817 sc->sram = rman_get_virtual(sc->mem_res); 4818 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4819 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4820 device_printf(dev, "impossible memory region size %jd\n", 4821 rman_get_size(sc->mem_res)); 4822 err = ENXIO; 4823 goto abort_with_mem_res; 4824 } 4825 4826 /* make NULL terminated copy of the EEPROM strings section of 4827 lanai SRAM */ 4828 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4829 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4830 rman_get_bushandle(sc->mem_res), 4831 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4832 sc->eeprom_strings, 4833 MXGE_EEPROM_STRINGS_SIZE - 2); 4834 err = mxge_parse_strings(sc); 4835 if (err != 0) 4836 goto abort_with_mem_res; 4837 4838 /* Enable write combining for efficient use of PCIe bus */ 4839 mxge_enable_wc(sc); 4840 4841 /* Allocate the out of band dma memory */ 4842 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4843 sizeof (mxge_cmd_t), 64); 4844 if (err != 0) 4845 goto abort_with_mem_res; 4846 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4847 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4848 if (err != 0) 4849 goto abort_with_cmd_dma; 4850 4851 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4852 if (err != 0) 4853 goto abort_with_zeropad_dma; 4854 4855 /* select & load the firmware */ 4856 err = mxge_select_firmware(sc); 4857 if (err != 0) 4858 goto abort_with_dmabench; 4859 sc->intr_coal_delay = mxge_intr_coal_delay; 4860 4861 mxge_slice_probe(sc); 4862 err = mxge_alloc_slices(sc); 4863 if (err != 0) 4864 goto abort_with_dmabench; 4865 4866 err = mxge_reset(sc, 0); 4867 if (err != 0) 4868 goto abort_with_slices; 4869 4870 err = mxge_alloc_rings(sc); 4871 if (err != 0) { 4872 device_printf(sc->dev, "failed to allocate rings\n"); 4873 goto abort_with_slices; 4874 } 4875 4876 err = mxge_add_irq(sc); 4877 if (err != 0) { 4878 device_printf(sc->dev, "failed to add irq\n"); 4879 goto abort_with_rings; 4880 } 4881 4882 ifp->if_baudrate = IF_Gbps(10); 4883 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4884 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4885 IFCAP_RXCSUM_IPV6; 4886 #if defined(INET) || defined(INET6) 4887 ifp->if_capabilities |= IFCAP_LRO; 4888 #endif 4889 4890 #ifdef MXGE_NEW_VLAN_API 4891 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4892 4893 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4894 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4895 sc->fw_ver_tiny >= 32) 4896 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4897 #endif 4898 sc->max_mtu = mxge_max_mtu(sc); 4899 if (sc->max_mtu >= 9000) 4900 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4901 else 4902 device_printf(dev, "MTU limited to %d. Install " 4903 "latest firmware for 9000 byte jumbo support\n", 4904 sc->max_mtu - ETHER_HDR_LEN); 4905 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4906 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4907 /* check to see if f/w supports TSO for IPv6 */ 4908 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4909 if (CSUM_TCP_IPV6) 4910 ifp->if_capabilities |= IFCAP_TSO6; 4911 sc->max_tso6_hlen = min(cmd.data0, 4912 sizeof (sc->ss[0].scratch)); 4913 } 4914 ifp->if_capenable = ifp->if_capabilities; 4915 if (sc->lro_cnt == 0) 4916 ifp->if_capenable &= ~IFCAP_LRO; 4917 ifp->if_init = mxge_init; 4918 ifp->if_softc = sc; 4919 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4920 ifp->if_ioctl = mxge_ioctl; 4921 ifp->if_start = mxge_start; 4922 ifp->if_get_counter = mxge_get_counter; 4923 /* Initialise the ifmedia structure */ 4924 ifmedia_init(&sc->media, 0, mxge_media_change, 4925 mxge_media_status); 4926 mxge_media_init(sc); 4927 mxge_media_probe(sc); 4928 sc->dying = 0; 4929 ether_ifattach(ifp, sc->mac_addr); 4930 /* ether_ifattach sets mtu to ETHERMTU */ 4931 if (mxge_initial_mtu != ETHERMTU) 4932 mxge_change_mtu(sc, mxge_initial_mtu); 4933 4934 mxge_add_sysctls(sc); 4935 #ifdef IFNET_BUF_RING 4936 ifp->if_transmit = mxge_transmit; 4937 ifp->if_qflush = mxge_qflush; 4938 #endif 4939 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4940 device_get_nameunit(sc->dev)); 4941 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4942 return 0; 4943 4944 abort_with_rings: 4945 mxge_free_rings(sc); 4946 abort_with_slices: 4947 mxge_free_slices(sc); 4948 abort_with_dmabench: 4949 mxge_dma_free(&sc->dmabench_dma); 4950 abort_with_zeropad_dma: 4951 mxge_dma_free(&sc->zeropad_dma); 4952 abort_with_cmd_dma: 4953 mxge_dma_free(&sc->cmd_dma); 4954 abort_with_mem_res: 4955 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4956 abort_with_lock: 4957 pci_disable_busmaster(dev); 4958 mtx_destroy(&sc->cmd_mtx); 4959 mtx_destroy(&sc->driver_mtx); 4960 if_free(ifp); 4961 abort_with_parent_dmat: 4962 bus_dma_tag_destroy(sc->parent_dmat); 4963 abort_with_tq: 4964 if (sc->tq != NULL) { 4965 taskqueue_drain(sc->tq, &sc->watchdog_task); 4966 taskqueue_free(sc->tq); 4967 sc->tq = NULL; 4968 } 4969 abort_with_nothing: 4970 return err; 4971 } 4972 4973 static int 4974 mxge_detach(device_t dev) 4975 { 4976 mxge_softc_t *sc = device_get_softc(dev); 4977 4978 if (mxge_vlans_active(sc)) { 4979 device_printf(sc->dev, 4980 "Detach vlans before removing module\n"); 4981 return EBUSY; 4982 } 4983 mtx_lock(&sc->driver_mtx); 4984 sc->dying = 1; 4985 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4986 mxge_close(sc, 0); 4987 mtx_unlock(&sc->driver_mtx); 4988 ether_ifdetach(sc->ifp); 4989 if (sc->tq != NULL) { 4990 taskqueue_drain(sc->tq, &sc->watchdog_task); 4991 taskqueue_free(sc->tq); 4992 sc->tq = NULL; 4993 } 4994 callout_drain(&sc->co_hdl); 4995 ifmedia_removeall(&sc->media); 4996 mxge_dummy_rdma(sc, 0); 4997 mxge_rem_sysctls(sc); 4998 mxge_rem_irq(sc); 4999 mxge_free_rings(sc); 5000 mxge_free_slices(sc); 5001 mxge_dma_free(&sc->dmabench_dma); 5002 mxge_dma_free(&sc->zeropad_dma); 5003 mxge_dma_free(&sc->cmd_dma); 5004 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5005 pci_disable_busmaster(dev); 5006 mtx_destroy(&sc->cmd_mtx); 5007 mtx_destroy(&sc->driver_mtx); 5008 if_free(sc->ifp); 5009 bus_dma_tag_destroy(sc->parent_dmat); 5010 return 0; 5011 } 5012 5013 static int 5014 mxge_shutdown(device_t dev) 5015 { 5016 return 0; 5017 } 5018 5019 /* 5020 This file uses Myri10GE driver indentation. 5021 5022 Local Variables: 5023 c-file-style:"linux" 5024 tab-width:8 5025 End: 5026 */ 5027