1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #ifdef IFNET_BUF_RING 96 #include <sys/buf_ring.h> 97 #endif 98 99 #include "opt_inet.h" 100 #include "opt_inet6.h" 101 102 /* tunable params */ 103 static int mxge_nvidia_ecrc_enable = 1; 104 static int mxge_force_firmware = 0; 105 static int mxge_intr_coal_delay = 30; 106 static int mxge_deassert_wait = 1; 107 static int mxge_flow_control = 1; 108 static int mxge_verbose = 0; 109 static int mxge_ticks; 110 static int mxge_max_slices = 1; 111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 112 static int mxge_always_promisc = 0; 113 static int mxge_initial_mtu = ETHERMTU_JUMBO; 114 static int mxge_throttle = 0; 115 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 116 static char *mxge_fw_aligned = "mxge_eth_z8e"; 117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 119 120 static int mxge_probe(device_t dev); 121 static int mxge_attach(device_t dev); 122 static int mxge_detach(device_t dev); 123 static int mxge_shutdown(device_t dev); 124 static void mxge_intr(void *arg); 125 126 static device_method_t mxge_methods[] = 127 { 128 /* Device interface */ 129 DEVMETHOD(device_probe, mxge_probe), 130 DEVMETHOD(device_attach, mxge_attach), 131 DEVMETHOD(device_detach, mxge_detach), 132 DEVMETHOD(device_shutdown, mxge_shutdown), 133 134 DEVMETHOD_END 135 }; 136 137 static driver_t mxge_driver = 138 { 139 "mxge", 140 mxge_methods, 141 sizeof(mxge_softc_t), 142 }; 143 144 static devclass_t mxge_devclass; 145 146 /* Declare ourselves to be a child of the PCI bus.*/ 147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 148 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 149 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 150 151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 153 static int mxge_close(mxge_softc_t *sc, int down); 154 static int mxge_open(mxge_softc_t *sc); 155 static void mxge_tick(void *arg); 156 157 static int 158 mxge_probe(device_t dev) 159 { 160 int rev; 161 162 163 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 164 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 165 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 166 rev = pci_get_revid(dev); 167 switch (rev) { 168 case MXGE_PCI_REV_Z8E: 169 device_set_desc(dev, "Myri10G-PCIE-8A"); 170 break; 171 case MXGE_PCI_REV_Z8ES: 172 device_set_desc(dev, "Myri10G-PCIE-8B"); 173 break; 174 default: 175 device_set_desc(dev, "Myri10G-PCIE-8??"); 176 device_printf(dev, "Unrecognized rev %d NIC\n", 177 rev); 178 break; 179 } 180 return 0; 181 } 182 return ENXIO; 183 } 184 185 static void 186 mxge_enable_wc(mxge_softc_t *sc) 187 { 188 #if defined(__i386) || defined(__amd64) 189 vm_offset_t len; 190 int err; 191 192 sc->wc = 1; 193 len = rman_get_size(sc->mem_res); 194 err = pmap_change_attr((vm_offset_t) sc->sram, 195 len, PAT_WRITE_COMBINING); 196 if (err != 0) { 197 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 198 err); 199 sc->wc = 0; 200 } 201 #endif 202 } 203 204 205 /* callback to get our DMA address */ 206 static void 207 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 208 int error) 209 { 210 if (error == 0) { 211 *(bus_addr_t *) arg = segs->ds_addr; 212 } 213 } 214 215 static int 216 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 217 bus_size_t alignment) 218 { 219 int err; 220 device_t dev = sc->dev; 221 bus_size_t boundary, maxsegsize; 222 223 if (bytes > 4096 && alignment == 4096) { 224 boundary = 0; 225 maxsegsize = bytes; 226 } else { 227 boundary = 4096; 228 maxsegsize = 4096; 229 } 230 231 /* allocate DMAable memory tags */ 232 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 233 alignment, /* alignment */ 234 boundary, /* boundary */ 235 BUS_SPACE_MAXADDR, /* low */ 236 BUS_SPACE_MAXADDR, /* high */ 237 NULL, NULL, /* filter */ 238 bytes, /* maxsize */ 239 1, /* num segs */ 240 maxsegsize, /* maxsegsize */ 241 BUS_DMA_COHERENT, /* flags */ 242 NULL, NULL, /* lock */ 243 &dma->dmat); /* tag */ 244 if (err != 0) { 245 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 246 return err; 247 } 248 249 /* allocate DMAable memory & map */ 250 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 251 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 252 | BUS_DMA_ZERO), &dma->map); 253 if (err != 0) { 254 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 255 goto abort_with_dmat; 256 } 257 258 /* load the memory */ 259 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 260 mxge_dmamap_callback, 261 (void *)&dma->bus_addr, 0); 262 if (err != 0) { 263 device_printf(dev, "couldn't load map (err = %d)\n", err); 264 goto abort_with_mem; 265 } 266 return 0; 267 268 abort_with_mem: 269 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 270 abort_with_dmat: 271 (void)bus_dma_tag_destroy(dma->dmat); 272 return err; 273 } 274 275 276 static void 277 mxge_dma_free(mxge_dma_t *dma) 278 { 279 bus_dmamap_unload(dma->dmat, dma->map); 280 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 281 (void)bus_dma_tag_destroy(dma->dmat); 282 } 283 284 /* 285 * The eeprom strings on the lanaiX have the format 286 * SN=x\0 287 * MAC=x:x:x:x:x:x\0 288 * PC=text\0 289 */ 290 291 static int 292 mxge_parse_strings(mxge_softc_t *sc) 293 { 294 char *ptr; 295 int i, found_mac, found_sn2; 296 char *endptr; 297 298 ptr = sc->eeprom_strings; 299 found_mac = 0; 300 found_sn2 = 0; 301 while (*ptr != '\0') { 302 if (strncmp(ptr, "MAC=", 4) == 0) { 303 ptr += 4; 304 for (i = 0;;) { 305 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 306 if (endptr - ptr != 2) 307 goto abort; 308 ptr = endptr; 309 if (++i == 6) 310 break; 311 if (*ptr++ != ':') 312 goto abort; 313 } 314 found_mac = 1; 315 } else if (strncmp(ptr, "PC=", 3) == 0) { 316 ptr += 3; 317 strlcpy(sc->product_code_string, ptr, 318 sizeof(sc->product_code_string)); 319 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 320 ptr += 3; 321 strlcpy(sc->serial_number_string, ptr, 322 sizeof(sc->serial_number_string)); 323 } else if (strncmp(ptr, "SN2=", 4) == 0) { 324 /* SN2 takes precedence over SN */ 325 ptr += 4; 326 found_sn2 = 1; 327 strlcpy(sc->serial_number_string, ptr, 328 sizeof(sc->serial_number_string)); 329 } 330 while (*ptr++ != '\0') {} 331 } 332 333 if (found_mac) 334 return 0; 335 336 abort: 337 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 338 339 return ENXIO; 340 } 341 342 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 343 static void 344 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 345 { 346 uint32_t val; 347 unsigned long base, off; 348 char *va, *cfgptr; 349 device_t pdev, mcp55; 350 uint16_t vendor_id, device_id, word; 351 uintptr_t bus, slot, func, ivend, idev; 352 uint32_t *ptr32; 353 354 355 if (!mxge_nvidia_ecrc_enable) 356 return; 357 358 pdev = device_get_parent(device_get_parent(sc->dev)); 359 if (pdev == NULL) { 360 device_printf(sc->dev, "could not find parent?\n"); 361 return; 362 } 363 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 364 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 365 366 if (vendor_id != 0x10de) 367 return; 368 369 base = 0; 370 371 if (device_id == 0x005d) { 372 /* ck804, base address is magic */ 373 base = 0xe0000000UL; 374 } else if (device_id >= 0x0374 && device_id <= 0x378) { 375 /* mcp55, base address stored in chipset */ 376 mcp55 = pci_find_bsf(0, 0, 0); 377 if (mcp55 && 378 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 379 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 380 word = pci_read_config(mcp55, 0x90, 2); 381 base = ((unsigned long)word & 0x7ffeU) << 25; 382 } 383 } 384 if (!base) 385 return; 386 387 /* XXXX 388 Test below is commented because it is believed that doing 389 config read/write beyond 0xff will access the config space 390 for the next larger function. Uncomment this and remove 391 the hacky pmap_mapdev() way of accessing config space when 392 FreeBSD grows support for extended pcie config space access 393 */ 394 #if 0 395 /* See if we can, by some miracle, access the extended 396 config space */ 397 val = pci_read_config(pdev, 0x178, 4); 398 if (val != 0xffffffff) { 399 val |= 0x40; 400 pci_write_config(pdev, 0x178, val, 4); 401 return; 402 } 403 #endif 404 /* Rather than using normal pci config space writes, we must 405 * map the Nvidia config space ourselves. This is because on 406 * opteron/nvidia class machine the 0xe000000 mapping is 407 * handled by the nvidia chipset, that means the internal PCI 408 * device (the on-chip northbridge), or the amd-8131 bridge 409 * and things behind them are not visible by this method. 410 */ 411 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_BUS, &bus); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_SLOT, &slot); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_FUNCTION, &func); 418 BUS_READ_IVAR(device_get_parent(pdev), pdev, 419 PCI_IVAR_VENDOR, &ivend); 420 BUS_READ_IVAR(device_get_parent(pdev), pdev, 421 PCI_IVAR_DEVICE, &idev); 422 423 off = base 424 + 0x00100000UL * (unsigned long)bus 425 + 0x00001000UL * (unsigned long)(func 426 + 8 * slot); 427 428 /* map it into the kernel */ 429 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 430 431 432 if (va == NULL) { 433 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 434 return; 435 } 436 /* get a pointer to the config space mapped into the kernel */ 437 cfgptr = va + (off & PAGE_MASK); 438 439 /* make sure that we can really access it */ 440 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 441 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 442 if (! (vendor_id == ivend && device_id == idev)) { 443 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 444 vendor_id, device_id); 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 return; 447 } 448 449 ptr32 = (uint32_t*)(cfgptr + 0x178); 450 val = *ptr32; 451 452 if (val == 0xffffffff) { 453 device_printf(sc->dev, "extended mapping failed\n"); 454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 455 return; 456 } 457 *ptr32 = val | 0x40; 458 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 459 if (mxge_verbose) 460 device_printf(sc->dev, 461 "Enabled ECRC on upstream Nvidia bridge " 462 "at %d:%d:%d\n", 463 (int)bus, (int)slot, (int)func); 464 return; 465 } 466 #else 467 static void 468 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 469 { 470 device_printf(sc->dev, 471 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 472 return; 473 } 474 #endif 475 476 477 static int 478 mxge_dma_test(mxge_softc_t *sc, int test_type) 479 { 480 mxge_cmd_t cmd; 481 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 482 int status; 483 uint32_t len; 484 char *test = " "; 485 486 487 /* Run a small DMA test. 488 * The magic multipliers to the length tell the firmware 489 * to do DMA read, write, or read+write tests. The 490 * results are returned in cmd.data0. The upper 16 491 * bits of the return is the number of transfers completed. 492 * The lower 16 bits is the time in 0.5us ticks that the 493 * transfers took to complete. 494 */ 495 496 len = sc->tx_boundary; 497 498 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 499 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 500 cmd.data2 = len * 0x10000; 501 status = mxge_send_cmd(sc, test_type, &cmd); 502 if (status != 0) { 503 test = "read"; 504 goto abort; 505 } 506 sc->read_dma = ((cmd.data0>>16) * len * 2) / 507 (cmd.data0 & 0xffff); 508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 510 cmd.data2 = len * 0x1; 511 status = mxge_send_cmd(sc, test_type, &cmd); 512 if (status != 0) { 513 test = "write"; 514 goto abort; 515 } 516 sc->write_dma = ((cmd.data0>>16) * len * 2) / 517 (cmd.data0 & 0xffff); 518 519 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 520 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 521 cmd.data2 = len * 0x10001; 522 status = mxge_send_cmd(sc, test_type, &cmd); 523 if (status != 0) { 524 test = "read/write"; 525 goto abort; 526 } 527 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 528 (cmd.data0 & 0xffff); 529 530 abort: 531 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 532 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 533 test, status); 534 535 return status; 536 } 537 538 /* 539 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 540 * when the PCI-E Completion packets are aligned on an 8-byte 541 * boundary. Some PCI-E chip sets always align Completion packets; on 542 * the ones that do not, the alignment can be enforced by enabling 543 * ECRC generation (if supported). 544 * 545 * When PCI-E Completion packets are not aligned, it is actually more 546 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 547 * 548 * If the driver can neither enable ECRC nor verify that it has 549 * already been enabled, then it must use a firmware image which works 550 * around unaligned completion packets (ethp_z8e.dat), and it should 551 * also ensure that it never gives the device a Read-DMA which is 552 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 553 * enabled, then the driver should use the aligned (eth_z8e.dat) 554 * firmware image, and set tx_boundary to 4KB. 555 */ 556 557 static int 558 mxge_firmware_probe(mxge_softc_t *sc) 559 { 560 device_t dev = sc->dev; 561 int reg, status; 562 uint16_t pectl; 563 564 sc->tx_boundary = 4096; 565 /* 566 * Verify the max read request size was set to 4KB 567 * before trying the test with 4KB. 568 */ 569 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 570 pectl = pci_read_config(dev, reg + 0x8, 2); 571 if ((pectl & (5 << 12)) != (5 << 12)) { 572 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 573 pectl); 574 sc->tx_boundary = 2048; 575 } 576 } 577 578 /* 579 * load the optimized firmware (which assumes aligned PCIe 580 * completions) in order to see if it works on this host. 581 */ 582 sc->fw_name = mxge_fw_aligned; 583 status = mxge_load_firmware(sc, 1); 584 if (status != 0) { 585 return status; 586 } 587 588 /* 589 * Enable ECRC if possible 590 */ 591 mxge_enable_nvidia_ecrc(sc); 592 593 /* 594 * Run a DMA test which watches for unaligned completions and 595 * aborts on the first one seen. Not required on Z8ES or newer. 596 */ 597 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 598 return 0; 599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 600 if (status == 0) 601 return 0; /* keep the aligned firmware */ 602 603 if (status != E2BIG) 604 device_printf(dev, "DMA test failed: %d\n", status); 605 if (status == ENOSYS) 606 device_printf(dev, "Falling back to ethp! " 607 "Please install up to date fw\n"); 608 return status; 609 } 610 611 static int 612 mxge_select_firmware(mxge_softc_t *sc) 613 { 614 int aligned = 0; 615 int force_firmware = mxge_force_firmware; 616 617 if (sc->throttle) 618 force_firmware = sc->throttle; 619 620 if (force_firmware != 0) { 621 if (force_firmware == 1) 622 aligned = 1; 623 else 624 aligned = 0; 625 if (mxge_verbose) 626 device_printf(sc->dev, 627 "Assuming %s completions (forced)\n", 628 aligned ? "aligned" : "unaligned"); 629 goto abort; 630 } 631 632 /* if the PCIe link width is 4 or less, we can use the aligned 633 firmware and skip any checks */ 634 if (sc->link_width != 0 && sc->link_width <= 4) { 635 device_printf(sc->dev, 636 "PCIe x%d Link, expect reduced performance\n", 637 sc->link_width); 638 aligned = 1; 639 goto abort; 640 } 641 642 if (0 == mxge_firmware_probe(sc)) 643 return 0; 644 645 abort: 646 if (aligned) { 647 sc->fw_name = mxge_fw_aligned; 648 sc->tx_boundary = 4096; 649 } else { 650 sc->fw_name = mxge_fw_unaligned; 651 sc->tx_boundary = 2048; 652 } 653 return (mxge_load_firmware(sc, 0)); 654 } 655 656 static int 657 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 658 { 659 660 661 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 662 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 663 be32toh(hdr->mcp_type)); 664 return EIO; 665 } 666 667 /* save firmware version for sysctl */ 668 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 669 if (mxge_verbose) 670 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 671 672 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 673 &sc->fw_ver_minor, &sc->fw_ver_tiny); 674 675 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 676 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 677 device_printf(sc->dev, "Found firmware version %s\n", 678 sc->fw_version); 679 device_printf(sc->dev, "Driver needs %d.%d\n", 680 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 681 return EINVAL; 682 } 683 return 0; 684 685 } 686 687 static int 688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 689 { 690 z_stream zs; 691 char *inflate_buffer; 692 const struct firmware *fw; 693 const mcp_gen_header_t *hdr; 694 unsigned hdr_offset; 695 int status; 696 unsigned int i; 697 char dummy; 698 size_t fw_len; 699 700 fw = firmware_get(sc->fw_name); 701 if (fw == NULL) { 702 device_printf(sc->dev, "Could not find firmware image %s\n", 703 sc->fw_name); 704 return ENOENT; 705 } 706 707 708 709 /* setup zlib and decompress f/w */ 710 bzero(&zs, sizeof (zs)); 711 zs.zalloc = zcalloc_nowait; 712 zs.zfree = zcfree; 713 status = inflateInit(&zs); 714 if (status != Z_OK) { 715 status = EIO; 716 goto abort_with_fw; 717 } 718 719 /* the uncompressed size is stored as the firmware version, 720 which would otherwise go unused */ 721 fw_len = (size_t) fw->version; 722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 723 if (inflate_buffer == NULL) 724 goto abort_with_zs; 725 zs.avail_in = fw->datasize; 726 zs.next_in = __DECONST(char *, fw->data); 727 zs.avail_out = fw_len; 728 zs.next_out = inflate_buffer; 729 status = inflate(&zs, Z_FINISH); 730 if (status != Z_STREAM_END) { 731 device_printf(sc->dev, "zlib %d\n", status); 732 status = EIO; 733 goto abort_with_buffer; 734 } 735 736 /* check id */ 737 hdr_offset = htobe32(*(const uint32_t *) 738 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 740 device_printf(sc->dev, "Bad firmware file"); 741 status = EIO; 742 goto abort_with_buffer; 743 } 744 hdr = (const void*)(inflate_buffer + hdr_offset); 745 746 status = mxge_validate_firmware(sc, hdr); 747 if (status != 0) 748 goto abort_with_buffer; 749 750 /* Copy the inflated firmware to NIC SRAM. */ 751 for (i = 0; i < fw_len; i += 256) { 752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 753 inflate_buffer + i, 754 min(256U, (unsigned)(fw_len - i))); 755 wmb(); 756 dummy = *sc->sram; 757 wmb(); 758 } 759 760 *limit = fw_len; 761 status = 0; 762 abort_with_buffer: 763 free(inflate_buffer, M_TEMP); 764 abort_with_zs: 765 inflateEnd(&zs); 766 abort_with_fw: 767 firmware_put(fw, FIRMWARE_UNLOAD); 768 return status; 769 } 770 771 /* 772 * Enable or disable periodic RDMAs from the host to make certain 773 * chipsets resend dropped PCIe messages 774 */ 775 776 static void 777 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 778 { 779 char buf_bytes[72]; 780 volatile uint32_t *confirm; 781 volatile char *submit; 782 uint32_t *buf, dma_low, dma_high; 783 int i; 784 785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 786 787 /* clear confirmation addr */ 788 confirm = (volatile uint32_t *)sc->cmd; 789 *confirm = 0; 790 wmb(); 791 792 /* send an rdma command to the PCIe engine, and wait for the 793 response in the confirmation address. The firmware should 794 write a -1 there to indicate it is alive and well 795 */ 796 797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 799 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 800 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 801 buf[2] = htobe32(0xffffffff); /* confirm data */ 802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 804 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 805 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 806 buf[5] = htobe32(enable); /* enable? */ 807 808 809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 810 811 mxge_pio_copy(submit, buf, 64); 812 wmb(); 813 DELAY(1000); 814 wmb(); 815 i = 0; 816 while (*confirm != 0xffffffff && i < 20) { 817 DELAY(1000); 818 i++; 819 } 820 if (*confirm != 0xffffffff) { 821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 822 (enable ? "enable" : "disable"), confirm, 823 *confirm); 824 } 825 return; 826 } 827 828 static int 829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 830 { 831 mcp_cmd_t *buf; 832 char buf_bytes[sizeof(*buf) + 8]; 833 volatile mcp_cmd_response_t *response = sc->cmd; 834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 835 uint32_t dma_low, dma_high; 836 int err, sleep_total = 0; 837 838 /* ensure buf is aligned to 8 bytes */ 839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 840 841 buf->data0 = htobe32(data->data0); 842 buf->data1 = htobe32(data->data1); 843 buf->data2 = htobe32(data->data2); 844 buf->cmd = htobe32(cmd); 845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 847 848 buf->response_addr.low = htobe32(dma_low); 849 buf->response_addr.high = htobe32(dma_high); 850 mtx_lock(&sc->cmd_mtx); 851 response->result = 0xffffffff; 852 wmb(); 853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 854 855 /* wait up to 20ms */ 856 err = EAGAIN; 857 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 858 bus_dmamap_sync(sc->cmd_dma.dmat, 859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 860 wmb(); 861 switch (be32toh(response->result)) { 862 case 0: 863 data->data0 = be32toh(response->data); 864 err = 0; 865 break; 866 case 0xffffffff: 867 DELAY(1000); 868 break; 869 case MXGEFW_CMD_UNKNOWN: 870 err = ENOSYS; 871 break; 872 case MXGEFW_CMD_ERROR_UNALIGNED: 873 err = E2BIG; 874 break; 875 case MXGEFW_CMD_ERROR_BUSY: 876 err = EBUSY; 877 break; 878 case MXGEFW_CMD_ERROR_I2C_ABSENT: 879 err = ENXIO; 880 break; 881 default: 882 device_printf(sc->dev, 883 "mxge: command %d " 884 "failed, result = %d\n", 885 cmd, be32toh(response->result)); 886 err = ENXIO; 887 break; 888 } 889 if (err != EAGAIN) 890 break; 891 } 892 if (err == EAGAIN) 893 device_printf(sc->dev, "mxge: command %d timed out" 894 "result = %d\n", 895 cmd, be32toh(response->result)); 896 mtx_unlock(&sc->cmd_mtx); 897 return err; 898 } 899 900 static int 901 mxge_adopt_running_firmware(mxge_softc_t *sc) 902 { 903 struct mcp_gen_header *hdr; 904 const size_t bytes = sizeof (struct mcp_gen_header); 905 size_t hdr_offset; 906 int status; 907 908 /* find running firmware header */ 909 hdr_offset = htobe32(*(volatile uint32_t *) 910 (sc->sram + MCP_HEADER_PTR_OFFSET)); 911 912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 913 device_printf(sc->dev, 914 "Running firmware has bad header offset (%d)\n", 915 (int)hdr_offset); 916 return EIO; 917 } 918 919 /* copy header of running firmware from SRAM to host memory to 920 * validate firmware */ 921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 922 if (hdr == NULL) { 923 device_printf(sc->dev, "could not malloc firmware hdr\n"); 924 return ENOMEM; 925 } 926 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 927 rman_get_bushandle(sc->mem_res), 928 hdr_offset, (char *)hdr, bytes); 929 status = mxge_validate_firmware(sc, hdr); 930 free(hdr, M_DEVBUF); 931 932 /* 933 * check to see if adopted firmware has bug where adopting 934 * it will cause broadcasts to be filtered unless the NIC 935 * is kept in ALLMULTI mode 936 */ 937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 939 sc->adopted_rx_filter_bug = 1; 940 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 941 "working around rx filter bug\n", 942 sc->fw_ver_major, sc->fw_ver_minor, 943 sc->fw_ver_tiny); 944 } 945 946 return status; 947 } 948 949 950 static int 951 mxge_load_firmware(mxge_softc_t *sc, int adopt) 952 { 953 volatile uint32_t *confirm; 954 volatile char *submit; 955 char buf_bytes[72]; 956 uint32_t *buf, size, dma_low, dma_high; 957 int status, i; 958 959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 960 961 size = sc->sram_size; 962 status = mxge_load_firmware_helper(sc, &size); 963 if (status) { 964 if (!adopt) 965 return status; 966 /* Try to use the currently running firmware, if 967 it is new enough */ 968 status = mxge_adopt_running_firmware(sc); 969 if (status) { 970 device_printf(sc->dev, 971 "failed to adopt running firmware\n"); 972 return status; 973 } 974 device_printf(sc->dev, 975 "Successfully adopted running firmware\n"); 976 if (sc->tx_boundary == 4096) { 977 device_printf(sc->dev, 978 "Using firmware currently running on NIC" 979 ". For optimal\n"); 980 device_printf(sc->dev, 981 "performance consider loading optimized " 982 "firmware\n"); 983 } 984 sc->fw_name = mxge_fw_unaligned; 985 sc->tx_boundary = 2048; 986 return 0; 987 } 988 /* clear confirmation addr */ 989 confirm = (volatile uint32_t *)sc->cmd; 990 *confirm = 0; 991 wmb(); 992 /* send a reload command to the bootstrap MCP, and wait for the 993 response in the confirmation address. The firmware should 994 write a -1 there to indicate it is alive and well 995 */ 996 997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 999 1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1002 buf[2] = htobe32(0xffffffff); /* confirm data */ 1003 1004 /* FIX: All newest firmware should un-protect the bottom of 1005 the sram before handoff. However, the very first interfaces 1006 do not. Therefore the handoff copy must skip the first 8 bytes 1007 */ 1008 /* where the code starts*/ 1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1010 buf[4] = htobe32(size - 8); /* length of code */ 1011 buf[5] = htobe32(8); /* where to copy to */ 1012 buf[6] = htobe32(0); /* where to jump to */ 1013 1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1015 mxge_pio_copy(submit, buf, 64); 1016 wmb(); 1017 DELAY(1000); 1018 wmb(); 1019 i = 0; 1020 while (*confirm != 0xffffffff && i < 20) { 1021 DELAY(1000*10); 1022 i++; 1023 bus_dmamap_sync(sc->cmd_dma.dmat, 1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1025 } 1026 if (*confirm != 0xffffffff) { 1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1028 confirm, *confirm); 1029 1030 return ENXIO; 1031 } 1032 return 0; 1033 } 1034 1035 static int 1036 mxge_update_mac_address(mxge_softc_t *sc) 1037 { 1038 mxge_cmd_t cmd; 1039 uint8_t *addr = sc->mac_addr; 1040 int status; 1041 1042 1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1044 | (addr[2] << 8) | addr[3]); 1045 1046 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1047 1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1049 return status; 1050 } 1051 1052 static int 1053 mxge_change_pause(mxge_softc_t *sc, int pause) 1054 { 1055 mxge_cmd_t cmd; 1056 int status; 1057 1058 if (pause) 1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1060 &cmd); 1061 else 1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1063 &cmd); 1064 1065 if (status) { 1066 device_printf(sc->dev, "Failed to set flow control mode\n"); 1067 return ENXIO; 1068 } 1069 sc->pause = pause; 1070 return 0; 1071 } 1072 1073 static void 1074 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1075 { 1076 mxge_cmd_t cmd; 1077 int status; 1078 1079 if (mxge_always_promisc) 1080 promisc = 1; 1081 1082 if (promisc) 1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1084 &cmd); 1085 else 1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1087 &cmd); 1088 1089 if (status) { 1090 device_printf(sc->dev, "Failed to set promisc mode\n"); 1091 } 1092 } 1093 1094 struct mxge_add_maddr_ctx { 1095 mxge_softc_t *sc; 1096 int error; 1097 }; 1098 1099 static u_int 1100 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1101 { 1102 struct mxge_add_maddr_ctx *ctx = arg; 1103 mxge_cmd_t cmd; 1104 1105 if (ctx->error != 0) 1106 return (0); 1107 bcopy(LLADDR(sdl), &cmd.data0, 4); 1108 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1109 cmd.data0 = htonl(cmd.data0); 1110 cmd.data1 = htonl(cmd.data1); 1111 1112 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1113 1114 return (1); 1115 } 1116 1117 static void 1118 mxge_set_multicast_list(mxge_softc_t *sc) 1119 { 1120 struct mxge_add_maddr_ctx ctx; 1121 struct ifnet *ifp = sc->ifp; 1122 mxge_cmd_t cmd; 1123 int err; 1124 1125 /* This firmware is known to not support multicast */ 1126 if (!sc->fw_multicast_support) 1127 return; 1128 1129 /* Disable multicast filtering while we play with the lists*/ 1130 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1131 if (err != 0) { 1132 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1133 " error status: %d\n", err); 1134 return; 1135 } 1136 1137 if (sc->adopted_rx_filter_bug) 1138 return; 1139 1140 if (ifp->if_flags & IFF_ALLMULTI) 1141 /* request to disable multicast filtering, so quit here */ 1142 return; 1143 1144 /* Flush all the filters */ 1145 1146 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1147 if (err != 0) { 1148 device_printf(sc->dev, 1149 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1150 ", error status: %d\n", err); 1151 return; 1152 } 1153 1154 /* Walk the multicast list, and add each address */ 1155 ctx.sc = sc; 1156 ctx.error = 0; 1157 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1158 if (ctx.error != 0) { 1159 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1160 "error status:" "%d\t", ctx.error); 1161 /* abort, leaving multicast filtering off */ 1162 return; 1163 } 1164 1165 /* Enable multicast filtering */ 1166 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1167 if (err != 0) { 1168 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1169 ", error status: %d\n", err); 1170 } 1171 } 1172 1173 static int 1174 mxge_max_mtu(mxge_softc_t *sc) 1175 { 1176 mxge_cmd_t cmd; 1177 int status; 1178 1179 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1180 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1181 1182 /* try to set nbufs to see if it we can 1183 use virtually contiguous jumbos */ 1184 cmd.data0 = 0; 1185 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1186 &cmd); 1187 if (status == 0) 1188 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1189 1190 /* otherwise, we're limited to MJUMPAGESIZE */ 1191 return MJUMPAGESIZE - MXGEFW_PAD; 1192 } 1193 1194 static int 1195 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1196 { 1197 struct mxge_slice_state *ss; 1198 mxge_rx_done_t *rx_done; 1199 volatile uint32_t *irq_claim; 1200 mxge_cmd_t cmd; 1201 int slice, status; 1202 1203 /* try to send a reset command to the card to see if it 1204 is alive */ 1205 memset(&cmd, 0, sizeof (cmd)); 1206 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1207 if (status != 0) { 1208 device_printf(sc->dev, "failed reset\n"); 1209 return ENXIO; 1210 } 1211 1212 mxge_dummy_rdma(sc, 1); 1213 1214 1215 /* set the intrq size */ 1216 cmd.data0 = sc->rx_ring_size; 1217 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1218 1219 /* 1220 * Even though we already know how many slices are supported 1221 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1222 * has magic side effects, and must be called after a reset. 1223 * It must be called prior to calling any RSS related cmds, 1224 * including assigning an interrupt queue for anything but 1225 * slice 0. It must also be called *after* 1226 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1227 * the firmware to compute offsets. 1228 */ 1229 1230 if (sc->num_slices > 1) { 1231 /* ask the maximum number of slices it supports */ 1232 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1233 &cmd); 1234 if (status != 0) { 1235 device_printf(sc->dev, 1236 "failed to get number of slices\n"); 1237 return status; 1238 } 1239 /* 1240 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1241 * to setting up the interrupt queue DMA 1242 */ 1243 cmd.data0 = sc->num_slices; 1244 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1245 #ifdef IFNET_BUF_RING 1246 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1247 #endif 1248 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1249 &cmd); 1250 if (status != 0) { 1251 device_printf(sc->dev, 1252 "failed to set number of slices\n"); 1253 return status; 1254 } 1255 } 1256 1257 1258 if (interrupts_setup) { 1259 /* Now exchange information about interrupts */ 1260 for (slice = 0; slice < sc->num_slices; slice++) { 1261 rx_done = &sc->ss[slice].rx_done; 1262 memset(rx_done->entry, 0, sc->rx_ring_size); 1263 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1264 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1265 cmd.data2 = slice; 1266 status |= mxge_send_cmd(sc, 1267 MXGEFW_CMD_SET_INTRQ_DMA, 1268 &cmd); 1269 } 1270 } 1271 1272 status |= mxge_send_cmd(sc, 1273 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1274 1275 1276 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1277 1278 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1279 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1280 1281 1282 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1283 &cmd); 1284 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1285 if (status != 0) { 1286 device_printf(sc->dev, "failed set interrupt parameters\n"); 1287 return status; 1288 } 1289 1290 1291 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1292 1293 1294 /* run a DMA benchmark */ 1295 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1296 1297 for (slice = 0; slice < sc->num_slices; slice++) { 1298 ss = &sc->ss[slice]; 1299 1300 ss->irq_claim = irq_claim + (2 * slice); 1301 /* reset mcp/driver shared state back to 0 */ 1302 ss->rx_done.idx = 0; 1303 ss->rx_done.cnt = 0; 1304 ss->tx.req = 0; 1305 ss->tx.done = 0; 1306 ss->tx.pkt_done = 0; 1307 ss->tx.queue_active = 0; 1308 ss->tx.activate = 0; 1309 ss->tx.deactivate = 0; 1310 ss->tx.wake = 0; 1311 ss->tx.defrag = 0; 1312 ss->tx.stall = 0; 1313 ss->rx_big.cnt = 0; 1314 ss->rx_small.cnt = 0; 1315 ss->lc.lro_bad_csum = 0; 1316 ss->lc.lro_queued = 0; 1317 ss->lc.lro_flushed = 0; 1318 if (ss->fw_stats != NULL) { 1319 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1320 } 1321 } 1322 sc->rdma_tags_available = 15; 1323 status = mxge_update_mac_address(sc); 1324 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1325 mxge_change_pause(sc, sc->pause); 1326 mxge_set_multicast_list(sc); 1327 if (sc->throttle) { 1328 cmd.data0 = sc->throttle; 1329 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1330 &cmd)) { 1331 device_printf(sc->dev, 1332 "can't enable throttle\n"); 1333 } 1334 } 1335 return status; 1336 } 1337 1338 static int 1339 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1340 { 1341 mxge_cmd_t cmd; 1342 mxge_softc_t *sc; 1343 int err; 1344 unsigned int throttle; 1345 1346 sc = arg1; 1347 throttle = sc->throttle; 1348 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1349 if (err != 0) { 1350 return err; 1351 } 1352 1353 if (throttle == sc->throttle) 1354 return 0; 1355 1356 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1357 return EINVAL; 1358 1359 mtx_lock(&sc->driver_mtx); 1360 cmd.data0 = throttle; 1361 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1362 if (err == 0) 1363 sc->throttle = throttle; 1364 mtx_unlock(&sc->driver_mtx); 1365 return err; 1366 } 1367 1368 static int 1369 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1370 { 1371 mxge_softc_t *sc; 1372 unsigned int intr_coal_delay; 1373 int err; 1374 1375 sc = arg1; 1376 intr_coal_delay = sc->intr_coal_delay; 1377 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1378 if (err != 0) { 1379 return err; 1380 } 1381 if (intr_coal_delay == sc->intr_coal_delay) 1382 return 0; 1383 1384 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1385 return EINVAL; 1386 1387 mtx_lock(&sc->driver_mtx); 1388 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1389 sc->intr_coal_delay = intr_coal_delay; 1390 1391 mtx_unlock(&sc->driver_mtx); 1392 return err; 1393 } 1394 1395 static int 1396 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1397 { 1398 mxge_softc_t *sc; 1399 unsigned int enabled; 1400 int err; 1401 1402 sc = arg1; 1403 enabled = sc->pause; 1404 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1405 if (err != 0) { 1406 return err; 1407 } 1408 if (enabled == sc->pause) 1409 return 0; 1410 1411 mtx_lock(&sc->driver_mtx); 1412 err = mxge_change_pause(sc, enabled); 1413 mtx_unlock(&sc->driver_mtx); 1414 return err; 1415 } 1416 1417 static int 1418 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1419 { 1420 int err; 1421 1422 if (arg1 == NULL) 1423 return EFAULT; 1424 arg2 = be32toh(*(int *)arg1); 1425 arg1 = NULL; 1426 err = sysctl_handle_int(oidp, arg1, arg2, req); 1427 1428 return err; 1429 } 1430 1431 static void 1432 mxge_rem_sysctls(mxge_softc_t *sc) 1433 { 1434 struct mxge_slice_state *ss; 1435 int slice; 1436 1437 if (sc->slice_sysctl_tree == NULL) 1438 return; 1439 1440 for (slice = 0; slice < sc->num_slices; slice++) { 1441 ss = &sc->ss[slice]; 1442 if (ss == NULL || ss->sysctl_tree == NULL) 1443 continue; 1444 sysctl_ctx_free(&ss->sysctl_ctx); 1445 ss->sysctl_tree = NULL; 1446 } 1447 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1448 sc->slice_sysctl_tree = NULL; 1449 } 1450 1451 static void 1452 mxge_add_sysctls(mxge_softc_t *sc) 1453 { 1454 struct sysctl_ctx_list *ctx; 1455 struct sysctl_oid_list *children; 1456 mcp_irq_data_t *fw; 1457 struct mxge_slice_state *ss; 1458 int slice; 1459 char slice_num[8]; 1460 1461 ctx = device_get_sysctl_ctx(sc->dev); 1462 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1463 fw = sc->ss[0].fw_stats; 1464 1465 /* random information */ 1466 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1467 "firmware_version", 1468 CTLFLAG_RD, sc->fw_version, 1469 0, "firmware version"); 1470 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1471 "serial_number", 1472 CTLFLAG_RD, sc->serial_number_string, 1473 0, "serial number"); 1474 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1475 "product_code", 1476 CTLFLAG_RD, sc->product_code_string, 1477 0, "product_code"); 1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1479 "pcie_link_width", 1480 CTLFLAG_RD, &sc->link_width, 1481 0, "tx_boundary"); 1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1483 "tx_boundary", 1484 CTLFLAG_RD, &sc->tx_boundary, 1485 0, "tx_boundary"); 1486 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1487 "write_combine", 1488 CTLFLAG_RD, &sc->wc, 1489 0, "write combining PIO?"); 1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1491 "read_dma_MBs", 1492 CTLFLAG_RD, &sc->read_dma, 1493 0, "DMA Read speed in MB/s"); 1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1495 "write_dma_MBs", 1496 CTLFLAG_RD, &sc->write_dma, 1497 0, "DMA Write speed in MB/s"); 1498 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1499 "read_write_dma_MBs", 1500 CTLFLAG_RD, &sc->read_write_dma, 1501 0, "DMA concurrent Read/Write speed in MB/s"); 1502 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1503 "watchdog_resets", 1504 CTLFLAG_RD, &sc->watchdog_resets, 1505 0, "Number of times NIC was reset"); 1506 1507 1508 /* performance related tunables */ 1509 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1510 "intr_coal_delay", 1511 CTLTYPE_INT|CTLFLAG_RW, sc, 1512 0, mxge_change_intr_coal, 1513 "I", "interrupt coalescing delay in usecs"); 1514 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1516 "throttle", 1517 CTLTYPE_INT|CTLFLAG_RW, sc, 1518 0, mxge_change_throttle, 1519 "I", "transmit throttling"); 1520 1521 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1522 "flow_control_enabled", 1523 CTLTYPE_INT|CTLFLAG_RW, sc, 1524 0, mxge_change_flow_control, 1525 "I", "interrupt coalescing delay in usecs"); 1526 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "deassert_wait", 1529 CTLFLAG_RW, &mxge_deassert_wait, 1530 0, "Wait for IRQ line to go low in ihandler"); 1531 1532 /* stats block from firmware is in network byte order. 1533 Need to swap it */ 1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1535 "link_up", 1536 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1537 0, mxge_handle_be32, 1538 "I", "link up"); 1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1540 "rdma_tags_available", 1541 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1542 0, mxge_handle_be32, 1543 "I", "rdma_tags_available"); 1544 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1545 "dropped_bad_crc32", 1546 CTLTYPE_INT|CTLFLAG_RD, 1547 &fw->dropped_bad_crc32, 1548 0, mxge_handle_be32, 1549 "I", "dropped_bad_crc32"); 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "dropped_bad_phy", 1552 CTLTYPE_INT|CTLFLAG_RD, 1553 &fw->dropped_bad_phy, 1554 0, mxge_handle_be32, 1555 "I", "dropped_bad_phy"); 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "dropped_link_error_or_filtered", 1558 CTLTYPE_INT|CTLFLAG_RD, 1559 &fw->dropped_link_error_or_filtered, 1560 0, mxge_handle_be32, 1561 "I", "dropped_link_error_or_filtered"); 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "dropped_link_overflow", 1564 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1565 0, mxge_handle_be32, 1566 "I", "dropped_link_overflow"); 1567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1568 "dropped_multicast_filtered", 1569 CTLTYPE_INT|CTLFLAG_RD, 1570 &fw->dropped_multicast_filtered, 1571 0, mxge_handle_be32, 1572 "I", "dropped_multicast_filtered"); 1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1574 "dropped_no_big_buffer", 1575 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1576 0, mxge_handle_be32, 1577 "I", "dropped_no_big_buffer"); 1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1579 "dropped_no_small_buffer", 1580 CTLTYPE_INT|CTLFLAG_RD, 1581 &fw->dropped_no_small_buffer, 1582 0, mxge_handle_be32, 1583 "I", "dropped_no_small_buffer"); 1584 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1585 "dropped_overrun", 1586 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1587 0, mxge_handle_be32, 1588 "I", "dropped_overrun"); 1589 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1590 "dropped_pause", 1591 CTLTYPE_INT|CTLFLAG_RD, 1592 &fw->dropped_pause, 1593 0, mxge_handle_be32, 1594 "I", "dropped_pause"); 1595 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1596 "dropped_runt", 1597 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1598 0, mxge_handle_be32, 1599 "I", "dropped_runt"); 1600 1601 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1602 "dropped_unicast_filtered", 1603 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1604 0, mxge_handle_be32, 1605 "I", "dropped_unicast_filtered"); 1606 1607 /* verbose printing? */ 1608 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1609 "verbose", 1610 CTLFLAG_RW, &mxge_verbose, 1611 0, "verbose printing"); 1612 1613 /* add counters exported for debugging from all slices */ 1614 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1615 sc->slice_sysctl_tree = 1616 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1617 "slice", CTLFLAG_RD, 0, ""); 1618 1619 for (slice = 0; slice < sc->num_slices; slice++) { 1620 ss = &sc->ss[slice]; 1621 sysctl_ctx_init(&ss->sysctl_ctx); 1622 ctx = &ss->sysctl_ctx; 1623 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1624 sprintf(slice_num, "%d", slice); 1625 ss->sysctl_tree = 1626 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1627 CTLFLAG_RD, 0, ""); 1628 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1630 "rx_small_cnt", 1631 CTLFLAG_RD, &ss->rx_small.cnt, 1632 0, "rx_small_cnt"); 1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1634 "rx_big_cnt", 1635 CTLFLAG_RD, &ss->rx_big.cnt, 1636 0, "rx_small_cnt"); 1637 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1638 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1639 0, "number of lro merge queues flushed"); 1640 1641 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1642 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1643 0, "number of bad csums preventing LRO"); 1644 1645 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1646 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1647 0, "number of frames appended to lro merge" 1648 "queues"); 1649 1650 #ifndef IFNET_BUF_RING 1651 /* only transmit from slice 0 for now */ 1652 if (slice > 0) 1653 continue; 1654 #endif 1655 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1656 "tx_req", 1657 CTLFLAG_RD, &ss->tx.req, 1658 0, "tx_req"); 1659 1660 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1661 "tx_done", 1662 CTLFLAG_RD, &ss->tx.done, 1663 0, "tx_done"); 1664 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1665 "tx_pkt_done", 1666 CTLFLAG_RD, &ss->tx.pkt_done, 1667 0, "tx_done"); 1668 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1669 "tx_stall", 1670 CTLFLAG_RD, &ss->tx.stall, 1671 0, "tx_stall"); 1672 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1673 "tx_wake", 1674 CTLFLAG_RD, &ss->tx.wake, 1675 0, "tx_wake"); 1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1677 "tx_defrag", 1678 CTLFLAG_RD, &ss->tx.defrag, 1679 0, "tx_defrag"); 1680 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1681 "tx_queue_active", 1682 CTLFLAG_RD, &ss->tx.queue_active, 1683 0, "tx_queue_active"); 1684 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1685 "tx_activate", 1686 CTLFLAG_RD, &ss->tx.activate, 1687 0, "tx_activate"); 1688 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1689 "tx_deactivate", 1690 CTLFLAG_RD, &ss->tx.deactivate, 1691 0, "tx_deactivate"); 1692 } 1693 } 1694 1695 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1696 backwards one at a time and handle ring wraps */ 1697 1698 static inline void 1699 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1700 mcp_kreq_ether_send_t *src, int cnt) 1701 { 1702 int idx, starting_slot; 1703 starting_slot = tx->req; 1704 while (cnt > 1) { 1705 cnt--; 1706 idx = (starting_slot + cnt) & tx->mask; 1707 mxge_pio_copy(&tx->lanai[idx], 1708 &src[cnt], sizeof(*src)); 1709 wmb(); 1710 } 1711 } 1712 1713 /* 1714 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1715 * at most 32 bytes at a time, so as to avoid involving the software 1716 * pio handler in the nic. We re-write the first segment's flags 1717 * to mark them valid only after writing the entire chain 1718 */ 1719 1720 static inline void 1721 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1722 int cnt) 1723 { 1724 int idx, i; 1725 uint32_t *src_ints; 1726 volatile uint32_t *dst_ints; 1727 mcp_kreq_ether_send_t *srcp; 1728 volatile mcp_kreq_ether_send_t *dstp, *dst; 1729 uint8_t last_flags; 1730 1731 idx = tx->req & tx->mask; 1732 1733 last_flags = src->flags; 1734 src->flags = 0; 1735 wmb(); 1736 dst = dstp = &tx->lanai[idx]; 1737 srcp = src; 1738 1739 if ((idx + cnt) < tx->mask) { 1740 for (i = 0; i < (cnt - 1); i += 2) { 1741 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1742 wmb(); /* force write every 32 bytes */ 1743 srcp += 2; 1744 dstp += 2; 1745 } 1746 } else { 1747 /* submit all but the first request, and ensure 1748 that it is submitted below */ 1749 mxge_submit_req_backwards(tx, src, cnt); 1750 i = 0; 1751 } 1752 if (i < cnt) { 1753 /* submit the first request */ 1754 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1755 wmb(); /* barrier before setting valid flag */ 1756 } 1757 1758 /* re-write the last 32-bits with the valid flags */ 1759 src->flags = last_flags; 1760 src_ints = (uint32_t *)src; 1761 src_ints+=3; 1762 dst_ints = (volatile uint32_t *)dst; 1763 dst_ints+=3; 1764 *dst_ints = *src_ints; 1765 tx->req += cnt; 1766 wmb(); 1767 } 1768 1769 static int 1770 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1771 struct mxge_pkt_info *pi) 1772 { 1773 struct ether_vlan_header *eh; 1774 uint16_t etype; 1775 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1776 #if IFCAP_TSO6 && defined(INET6) 1777 int nxt; 1778 #endif 1779 1780 eh = mtod(m, struct ether_vlan_header *); 1781 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1782 etype = ntohs(eh->evl_proto); 1783 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1784 } else { 1785 etype = ntohs(eh->evl_encap_proto); 1786 pi->ip_off = ETHER_HDR_LEN; 1787 } 1788 1789 switch (etype) { 1790 case ETHERTYPE_IP: 1791 /* 1792 * ensure ip header is in first mbuf, copy it to a 1793 * scratch buffer if not 1794 */ 1795 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1796 pi->ip6 = NULL; 1797 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1798 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1799 ss->scratch); 1800 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1801 } 1802 pi->ip_hlen = pi->ip->ip_hl << 2; 1803 if (!tso) 1804 return 0; 1805 1806 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1807 sizeof(struct tcphdr))) { 1808 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1809 sizeof(struct tcphdr), ss->scratch); 1810 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1811 } 1812 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1813 break; 1814 #if IFCAP_TSO6 && defined(INET6) 1815 case ETHERTYPE_IPV6: 1816 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1817 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1818 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1819 ss->scratch); 1820 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1821 } 1822 nxt = 0; 1823 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1824 pi->ip_hlen -= pi->ip_off; 1825 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1826 return EINVAL; 1827 1828 if (!tso) 1829 return 0; 1830 1831 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1832 return EINVAL; 1833 1834 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1835 sizeof(struct tcphdr))) { 1836 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1837 sizeof(struct tcphdr), ss->scratch); 1838 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1839 } 1840 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1841 break; 1842 #endif 1843 default: 1844 return EINVAL; 1845 } 1846 return 0; 1847 } 1848 1849 #if IFCAP_TSO4 1850 1851 static void 1852 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1853 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1854 { 1855 mxge_tx_ring_t *tx; 1856 mcp_kreq_ether_send_t *req; 1857 bus_dma_segment_t *seg; 1858 uint32_t low, high_swapped; 1859 int len, seglen, cum_len, cum_len_next; 1860 int next_is_first, chop, cnt, rdma_count, small; 1861 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1862 uint8_t flags, flags_next; 1863 static int once; 1864 1865 mss = m->m_pkthdr.tso_segsz; 1866 1867 /* negative cum_len signifies to the 1868 * send loop that we are still in the 1869 * header portion of the TSO packet. 1870 */ 1871 1872 cksum_offset = pi->ip_off + pi->ip_hlen; 1873 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1874 1875 /* TSO implies checksum offload on this hardware */ 1876 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1877 /* 1878 * If packet has full TCP csum, replace it with pseudo hdr 1879 * sum that the NIC expects, otherwise the NIC will emit 1880 * packets with bad TCP checksums. 1881 */ 1882 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1883 if (pi->ip6) { 1884 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1885 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1886 sum = in6_cksum_pseudo(pi->ip6, 1887 m->m_pkthdr.len - cksum_offset, 1888 IPPROTO_TCP, 0); 1889 #endif 1890 } else { 1891 #ifdef INET 1892 m->m_pkthdr.csum_flags |= CSUM_TCP; 1893 sum = in_pseudo(pi->ip->ip_src.s_addr, 1894 pi->ip->ip_dst.s_addr, 1895 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1896 cksum_offset))); 1897 #endif 1898 } 1899 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1900 cksum_offset, sizeof(sum), (caddr_t)&sum); 1901 } 1902 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1903 1904 1905 /* for TSO, pseudo_hdr_offset holds mss. 1906 * The firmware figures out where to put 1907 * the checksum by parsing the header. */ 1908 pseudo_hdr_offset = htobe16(mss); 1909 1910 if (pi->ip6) { 1911 /* 1912 * for IPv6 TSO, the "checksum offset" is re-purposed 1913 * to store the TCP header len 1914 */ 1915 cksum_offset = (pi->tcp->th_off << 2); 1916 } 1917 1918 tx = &ss->tx; 1919 req = tx->req_list; 1920 seg = tx->seg_list; 1921 cnt = 0; 1922 rdma_count = 0; 1923 /* "rdma_count" is the number of RDMAs belonging to the 1924 * current packet BEFORE the current send request. For 1925 * non-TSO packets, this is equal to "count". 1926 * For TSO packets, rdma_count needs to be reset 1927 * to 0 after a segment cut. 1928 * 1929 * The rdma_count field of the send request is 1930 * the number of RDMAs of the packet starting at 1931 * that request. For TSO send requests with one ore more cuts 1932 * in the middle, this is the number of RDMAs starting 1933 * after the last cut in the request. All previous 1934 * segments before the last cut implicitly have 1 RDMA. 1935 * 1936 * Since the number of RDMAs is not known beforehand, 1937 * it must be filled-in retroactively - after each 1938 * segmentation cut or at the end of the entire packet. 1939 */ 1940 1941 while (busdma_seg_cnt) { 1942 /* Break the busdma segment up into pieces*/ 1943 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1944 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1945 len = seg->ds_len; 1946 1947 while (len) { 1948 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1949 seglen = len; 1950 cum_len_next = cum_len + seglen; 1951 (req-rdma_count)->rdma_count = rdma_count + 1; 1952 if (__predict_true(cum_len >= 0)) { 1953 /* payload */ 1954 chop = (cum_len_next > mss); 1955 cum_len_next = cum_len_next % mss; 1956 next_is_first = (cum_len_next == 0); 1957 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1958 flags_next |= next_is_first * 1959 MXGEFW_FLAGS_FIRST; 1960 rdma_count |= -(chop | next_is_first); 1961 rdma_count += chop & !next_is_first; 1962 } else if (cum_len_next >= 0) { 1963 /* header ends */ 1964 rdma_count = -1; 1965 cum_len_next = 0; 1966 seglen = -cum_len; 1967 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1968 flags_next = MXGEFW_FLAGS_TSO_PLD | 1969 MXGEFW_FLAGS_FIRST | 1970 (small * MXGEFW_FLAGS_SMALL); 1971 } 1972 1973 req->addr_high = high_swapped; 1974 req->addr_low = htobe32(low); 1975 req->pseudo_hdr_offset = pseudo_hdr_offset; 1976 req->pad = 0; 1977 req->rdma_count = 1; 1978 req->length = htobe16(seglen); 1979 req->cksum_offset = cksum_offset; 1980 req->flags = flags | ((cum_len & 1) * 1981 MXGEFW_FLAGS_ALIGN_ODD); 1982 low += seglen; 1983 len -= seglen; 1984 cum_len = cum_len_next; 1985 flags = flags_next; 1986 req++; 1987 cnt++; 1988 rdma_count++; 1989 if (cksum_offset != 0 && !pi->ip6) { 1990 if (__predict_false(cksum_offset > seglen)) 1991 cksum_offset -= seglen; 1992 else 1993 cksum_offset = 0; 1994 } 1995 if (__predict_false(cnt > tx->max_desc)) 1996 goto drop; 1997 } 1998 busdma_seg_cnt--; 1999 seg++; 2000 } 2001 (req-rdma_count)->rdma_count = rdma_count; 2002 2003 do { 2004 req--; 2005 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2006 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 2007 2008 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2009 mxge_submit_req(tx, tx->req_list, cnt); 2010 #ifdef IFNET_BUF_RING 2011 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2012 /* tell the NIC to start polling this slice */ 2013 *tx->send_go = 1; 2014 tx->queue_active = 1; 2015 tx->activate++; 2016 wmb(); 2017 } 2018 #endif 2019 return; 2020 2021 drop: 2022 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2023 m_freem(m); 2024 ss->oerrors++; 2025 if (!once) { 2026 printf("tx->max_desc exceeded via TSO!\n"); 2027 printf("mss = %d, %ld, %d!\n", mss, 2028 (long)seg - (long)tx->seg_list, tx->max_desc); 2029 once = 1; 2030 } 2031 return; 2032 2033 } 2034 2035 #endif /* IFCAP_TSO4 */ 2036 2037 #ifdef MXGE_NEW_VLAN_API 2038 /* 2039 * We reproduce the software vlan tag insertion from 2040 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2041 * vlan tag insertion. We need to advertise this in order to have the 2042 * vlan interface respect our csum offload flags. 2043 */ 2044 static struct mbuf * 2045 mxge_vlan_tag_insert(struct mbuf *m) 2046 { 2047 struct ether_vlan_header *evl; 2048 2049 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2050 if (__predict_false(m == NULL)) 2051 return NULL; 2052 if (m->m_len < sizeof(*evl)) { 2053 m = m_pullup(m, sizeof(*evl)); 2054 if (__predict_false(m == NULL)) 2055 return NULL; 2056 } 2057 /* 2058 * Transform the Ethernet header into an Ethernet header 2059 * with 802.1Q encapsulation. 2060 */ 2061 evl = mtod(m, struct ether_vlan_header *); 2062 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2063 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2064 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2065 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2066 m->m_flags &= ~M_VLANTAG; 2067 return m; 2068 } 2069 #endif /* MXGE_NEW_VLAN_API */ 2070 2071 static void 2072 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2073 { 2074 struct mxge_pkt_info pi = {0,0,0,0}; 2075 mxge_softc_t *sc; 2076 mcp_kreq_ether_send_t *req; 2077 bus_dma_segment_t *seg; 2078 struct mbuf *m_tmp; 2079 struct ifnet *ifp; 2080 mxge_tx_ring_t *tx; 2081 int cnt, cum_len, err, i, idx, odd_flag; 2082 uint16_t pseudo_hdr_offset; 2083 uint8_t flags, cksum_offset; 2084 2085 2086 sc = ss->sc; 2087 ifp = sc->ifp; 2088 tx = &ss->tx; 2089 2090 #ifdef MXGE_NEW_VLAN_API 2091 if (m->m_flags & M_VLANTAG) { 2092 m = mxge_vlan_tag_insert(m); 2093 if (__predict_false(m == NULL)) 2094 goto drop_without_m; 2095 } 2096 #endif 2097 if (m->m_pkthdr.csum_flags & 2098 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2099 if (mxge_parse_tx(ss, m, &pi)) 2100 goto drop; 2101 } 2102 2103 /* (try to) map the frame for DMA */ 2104 idx = tx->req & tx->mask; 2105 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2106 m, tx->seg_list, &cnt, 2107 BUS_DMA_NOWAIT); 2108 if (__predict_false(err == EFBIG)) { 2109 /* Too many segments in the chain. Try 2110 to defrag */ 2111 m_tmp = m_defrag(m, M_NOWAIT); 2112 if (m_tmp == NULL) { 2113 goto drop; 2114 } 2115 ss->tx.defrag++; 2116 m = m_tmp; 2117 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2118 tx->info[idx].map, 2119 m, tx->seg_list, &cnt, 2120 BUS_DMA_NOWAIT); 2121 } 2122 if (__predict_false(err != 0)) { 2123 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2124 " packet len = %d\n", err, m->m_pkthdr.len); 2125 goto drop; 2126 } 2127 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2128 BUS_DMASYNC_PREWRITE); 2129 tx->info[idx].m = m; 2130 2131 #if IFCAP_TSO4 2132 /* TSO is different enough, we handle it in another routine */ 2133 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2134 mxge_encap_tso(ss, m, cnt, &pi); 2135 return; 2136 } 2137 #endif 2138 2139 req = tx->req_list; 2140 cksum_offset = 0; 2141 pseudo_hdr_offset = 0; 2142 flags = MXGEFW_FLAGS_NO_TSO; 2143 2144 /* checksum offloading? */ 2145 if (m->m_pkthdr.csum_flags & 2146 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2147 /* ensure ip header is in first mbuf, copy 2148 it to a scratch buffer if not */ 2149 cksum_offset = pi.ip_off + pi.ip_hlen; 2150 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2151 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2152 req->cksum_offset = cksum_offset; 2153 flags |= MXGEFW_FLAGS_CKSUM; 2154 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2155 } else { 2156 odd_flag = 0; 2157 } 2158 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2159 flags |= MXGEFW_FLAGS_SMALL; 2160 2161 /* convert segments into a request list */ 2162 cum_len = 0; 2163 seg = tx->seg_list; 2164 req->flags = MXGEFW_FLAGS_FIRST; 2165 for (i = 0; i < cnt; i++) { 2166 req->addr_low = 2167 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2168 req->addr_high = 2169 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2170 req->length = htobe16(seg->ds_len); 2171 req->cksum_offset = cksum_offset; 2172 if (cksum_offset > seg->ds_len) 2173 cksum_offset -= seg->ds_len; 2174 else 2175 cksum_offset = 0; 2176 req->pseudo_hdr_offset = pseudo_hdr_offset; 2177 req->pad = 0; /* complete solid 16-byte block */ 2178 req->rdma_count = 1; 2179 req->flags |= flags | ((cum_len & 1) * odd_flag); 2180 cum_len += seg->ds_len; 2181 seg++; 2182 req++; 2183 req->flags = 0; 2184 } 2185 req--; 2186 /* pad runts to 60 bytes */ 2187 if (cum_len < 60) { 2188 req++; 2189 req->addr_low = 2190 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2191 req->addr_high = 2192 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2193 req->length = htobe16(60 - cum_len); 2194 req->cksum_offset = 0; 2195 req->pseudo_hdr_offset = pseudo_hdr_offset; 2196 req->pad = 0; /* complete solid 16-byte block */ 2197 req->rdma_count = 1; 2198 req->flags |= flags | ((cum_len & 1) * odd_flag); 2199 cnt++; 2200 } 2201 2202 tx->req_list[0].rdma_count = cnt; 2203 #if 0 2204 /* print what the firmware will see */ 2205 for (i = 0; i < cnt; i++) { 2206 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2207 "cso:%d, flags:0x%x, rdma:%d\n", 2208 i, (int)ntohl(tx->req_list[i].addr_high), 2209 (int)ntohl(tx->req_list[i].addr_low), 2210 (int)ntohs(tx->req_list[i].length), 2211 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2212 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2213 tx->req_list[i].rdma_count); 2214 } 2215 printf("--------------\n"); 2216 #endif 2217 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2218 mxge_submit_req(tx, tx->req_list, cnt); 2219 #ifdef IFNET_BUF_RING 2220 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2221 /* tell the NIC to start polling this slice */ 2222 *tx->send_go = 1; 2223 tx->queue_active = 1; 2224 tx->activate++; 2225 wmb(); 2226 } 2227 #endif 2228 return; 2229 2230 drop: 2231 m_freem(m); 2232 drop_without_m: 2233 ss->oerrors++; 2234 return; 2235 } 2236 2237 #ifdef IFNET_BUF_RING 2238 static void 2239 mxge_qflush(struct ifnet *ifp) 2240 { 2241 mxge_softc_t *sc = ifp->if_softc; 2242 mxge_tx_ring_t *tx; 2243 struct mbuf *m; 2244 int slice; 2245 2246 for (slice = 0; slice < sc->num_slices; slice++) { 2247 tx = &sc->ss[slice].tx; 2248 mtx_lock(&tx->mtx); 2249 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2250 m_freem(m); 2251 mtx_unlock(&tx->mtx); 2252 } 2253 if_qflush(ifp); 2254 } 2255 2256 static inline void 2257 mxge_start_locked(struct mxge_slice_state *ss) 2258 { 2259 mxge_softc_t *sc; 2260 struct mbuf *m; 2261 struct ifnet *ifp; 2262 mxge_tx_ring_t *tx; 2263 2264 sc = ss->sc; 2265 ifp = sc->ifp; 2266 tx = &ss->tx; 2267 2268 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2269 m = drbr_dequeue(ifp, tx->br); 2270 if (m == NULL) { 2271 return; 2272 } 2273 /* let BPF see it */ 2274 BPF_MTAP(ifp, m); 2275 2276 /* give it to the nic */ 2277 mxge_encap(ss, m); 2278 } 2279 /* ran out of transmit slots */ 2280 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2281 && (!drbr_empty(ifp, tx->br))) { 2282 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2283 tx->stall++; 2284 } 2285 } 2286 2287 static int 2288 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2289 { 2290 mxge_softc_t *sc; 2291 struct ifnet *ifp; 2292 mxge_tx_ring_t *tx; 2293 int err; 2294 2295 sc = ss->sc; 2296 ifp = sc->ifp; 2297 tx = &ss->tx; 2298 2299 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2300 IFF_DRV_RUNNING) { 2301 err = drbr_enqueue(ifp, tx->br, m); 2302 return (err); 2303 } 2304 2305 if (!drbr_needs_enqueue(ifp, tx->br) && 2306 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2307 /* let BPF see it */ 2308 BPF_MTAP(ifp, m); 2309 /* give it to the nic */ 2310 mxge_encap(ss, m); 2311 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2312 return (err); 2313 } 2314 if (!drbr_empty(ifp, tx->br)) 2315 mxge_start_locked(ss); 2316 return (0); 2317 } 2318 2319 static int 2320 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2321 { 2322 mxge_softc_t *sc = ifp->if_softc; 2323 struct mxge_slice_state *ss; 2324 mxge_tx_ring_t *tx; 2325 int err = 0; 2326 int slice; 2327 2328 slice = m->m_pkthdr.flowid; 2329 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2330 2331 ss = &sc->ss[slice]; 2332 tx = &ss->tx; 2333 2334 if (mtx_trylock(&tx->mtx)) { 2335 err = mxge_transmit_locked(ss, m); 2336 mtx_unlock(&tx->mtx); 2337 } else { 2338 err = drbr_enqueue(ifp, tx->br, m); 2339 } 2340 2341 return (err); 2342 } 2343 2344 #else 2345 2346 static inline void 2347 mxge_start_locked(struct mxge_slice_state *ss) 2348 { 2349 mxge_softc_t *sc; 2350 struct mbuf *m; 2351 struct ifnet *ifp; 2352 mxge_tx_ring_t *tx; 2353 2354 sc = ss->sc; 2355 ifp = sc->ifp; 2356 tx = &ss->tx; 2357 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2358 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2359 if (m == NULL) { 2360 return; 2361 } 2362 /* let BPF see it */ 2363 BPF_MTAP(ifp, m); 2364 2365 /* give it to the nic */ 2366 mxge_encap(ss, m); 2367 } 2368 /* ran out of transmit slots */ 2369 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2370 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2371 tx->stall++; 2372 } 2373 } 2374 #endif 2375 static void 2376 mxge_start(struct ifnet *ifp) 2377 { 2378 mxge_softc_t *sc = ifp->if_softc; 2379 struct mxge_slice_state *ss; 2380 2381 /* only use the first slice for now */ 2382 ss = &sc->ss[0]; 2383 mtx_lock(&ss->tx.mtx); 2384 mxge_start_locked(ss); 2385 mtx_unlock(&ss->tx.mtx); 2386 } 2387 2388 /* 2389 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2390 * at most 32 bytes at a time, so as to avoid involving the software 2391 * pio handler in the nic. We re-write the first segment's low 2392 * DMA address to mark it valid only after we write the entire chunk 2393 * in a burst 2394 */ 2395 static inline void 2396 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2397 mcp_kreq_ether_recv_t *src) 2398 { 2399 uint32_t low; 2400 2401 low = src->addr_low; 2402 src->addr_low = 0xffffffff; 2403 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2404 wmb(); 2405 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2406 wmb(); 2407 src->addr_low = low; 2408 dst->addr_low = low; 2409 wmb(); 2410 } 2411 2412 static int 2413 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2414 { 2415 bus_dma_segment_t seg; 2416 struct mbuf *m; 2417 mxge_rx_ring_t *rx = &ss->rx_small; 2418 int cnt, err; 2419 2420 m = m_gethdr(M_NOWAIT, MT_DATA); 2421 if (m == NULL) { 2422 rx->alloc_fail++; 2423 err = ENOBUFS; 2424 goto done; 2425 } 2426 m->m_len = MHLEN; 2427 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2428 &seg, &cnt, BUS_DMA_NOWAIT); 2429 if (err != 0) { 2430 m_free(m); 2431 goto done; 2432 } 2433 rx->info[idx].m = m; 2434 rx->shadow[idx].addr_low = 2435 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2436 rx->shadow[idx].addr_high = 2437 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2438 2439 done: 2440 if ((idx & 7) == 7) 2441 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2442 return err; 2443 } 2444 2445 static int 2446 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2447 { 2448 bus_dma_segment_t seg[3]; 2449 struct mbuf *m; 2450 mxge_rx_ring_t *rx = &ss->rx_big; 2451 int cnt, err, i; 2452 2453 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2454 if (m == NULL) { 2455 rx->alloc_fail++; 2456 err = ENOBUFS; 2457 goto done; 2458 } 2459 m->m_len = rx->mlen; 2460 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2461 seg, &cnt, BUS_DMA_NOWAIT); 2462 if (err != 0) { 2463 m_free(m); 2464 goto done; 2465 } 2466 rx->info[idx].m = m; 2467 rx->shadow[idx].addr_low = 2468 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2469 rx->shadow[idx].addr_high = 2470 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2471 2472 #if MXGE_VIRT_JUMBOS 2473 for (i = 1; i < cnt; i++) { 2474 rx->shadow[idx + i].addr_low = 2475 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2476 rx->shadow[idx + i].addr_high = 2477 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2478 } 2479 #endif 2480 2481 done: 2482 for (i = 0; i < rx->nbufs; i++) { 2483 if ((idx & 7) == 7) { 2484 mxge_submit_8rx(&rx->lanai[idx - 7], 2485 &rx->shadow[idx - 7]); 2486 } 2487 idx++; 2488 } 2489 return err; 2490 } 2491 2492 #ifdef INET6 2493 2494 static uint16_t 2495 mxge_csum_generic(uint16_t *raw, int len) 2496 { 2497 uint32_t csum; 2498 2499 2500 csum = 0; 2501 while (len > 0) { 2502 csum += *raw; 2503 raw++; 2504 len -= 2; 2505 } 2506 csum = (csum >> 16) + (csum & 0xffff); 2507 csum = (csum >> 16) + (csum & 0xffff); 2508 return (uint16_t)csum; 2509 } 2510 2511 static inline uint16_t 2512 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2513 { 2514 uint32_t partial; 2515 int nxt, cksum_offset; 2516 struct ip6_hdr *ip6 = p; 2517 uint16_t c; 2518 2519 nxt = ip6->ip6_nxt; 2520 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2521 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2522 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2523 IPPROTO_IPV6, &nxt); 2524 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2525 return (1); 2526 } 2527 2528 /* 2529 * IPv6 headers do not contain a checksum, and hence 2530 * do not checksum to zero, so they don't "fall out" 2531 * of the partial checksum calculation like IPv4 2532 * headers do. We need to fix the partial checksum by 2533 * subtracting the checksum of the IPv6 header. 2534 */ 2535 2536 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2537 ETHER_HDR_LEN); 2538 csum += ~partial; 2539 csum += (csum < ~partial); 2540 csum = (csum >> 16) + (csum & 0xFFFF); 2541 csum = (csum >> 16) + (csum & 0xFFFF); 2542 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2543 csum); 2544 c ^= 0xffff; 2545 return (c); 2546 } 2547 #endif /* INET6 */ 2548 /* 2549 * Myri10GE hardware checksums are not valid if the sender 2550 * padded the frame with non-zero padding. This is because 2551 * the firmware just does a simple 16-bit 1s complement 2552 * checksum across the entire frame, excluding the first 14 2553 * bytes. It is best to simply to check the checksum and 2554 * tell the stack about it only if the checksum is good 2555 */ 2556 2557 static inline uint16_t 2558 mxge_rx_csum(struct mbuf *m, int csum) 2559 { 2560 struct ether_header *eh; 2561 #ifdef INET 2562 struct ip *ip; 2563 #endif 2564 #if defined(INET) || defined(INET6) 2565 int cap = m->m_pkthdr.rcvif->if_capenable; 2566 #endif 2567 uint16_t c, etype; 2568 2569 2570 eh = mtod(m, struct ether_header *); 2571 etype = ntohs(eh->ether_type); 2572 switch (etype) { 2573 #ifdef INET 2574 case ETHERTYPE_IP: 2575 if ((cap & IFCAP_RXCSUM) == 0) 2576 return (1); 2577 ip = (struct ip *)(eh + 1); 2578 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2579 return (1); 2580 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2581 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2582 (ip->ip_hl << 2) + ip->ip_p)); 2583 c ^= 0xffff; 2584 break; 2585 #endif 2586 #ifdef INET6 2587 case ETHERTYPE_IPV6: 2588 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2589 return (1); 2590 c = mxge_rx_csum6((eh + 1), m, csum); 2591 break; 2592 #endif 2593 default: 2594 c = 1; 2595 } 2596 return (c); 2597 } 2598 2599 static void 2600 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2601 { 2602 struct ether_vlan_header *evl; 2603 struct ether_header *eh; 2604 uint32_t partial; 2605 2606 evl = mtod(m, struct ether_vlan_header *); 2607 eh = mtod(m, struct ether_header *); 2608 2609 /* 2610 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2611 * after what the firmware thought was the end of the ethernet 2612 * header. 2613 */ 2614 2615 /* put checksum into host byte order */ 2616 *csum = ntohs(*csum); 2617 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2618 (*csum) += ~partial; 2619 (*csum) += ((*csum) < ~partial); 2620 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2621 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2622 2623 /* restore checksum to network byte order; 2624 later consumers expect this */ 2625 *csum = htons(*csum); 2626 2627 /* save the tag */ 2628 #ifdef MXGE_NEW_VLAN_API 2629 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2630 #else 2631 { 2632 struct m_tag *mtag; 2633 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2634 M_NOWAIT); 2635 if (mtag == NULL) 2636 return; 2637 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2638 m_tag_prepend(m, mtag); 2639 } 2640 2641 #endif 2642 m->m_flags |= M_VLANTAG; 2643 2644 /* 2645 * Remove the 802.1q header by copying the Ethernet 2646 * addresses over it and adjusting the beginning of 2647 * the data in the mbuf. The encapsulated Ethernet 2648 * type field is already in place. 2649 */ 2650 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2651 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2652 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2653 } 2654 2655 2656 static inline void 2657 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2658 uint32_t csum, int lro) 2659 { 2660 mxge_softc_t *sc; 2661 struct ifnet *ifp; 2662 struct mbuf *m; 2663 struct ether_header *eh; 2664 mxge_rx_ring_t *rx; 2665 bus_dmamap_t old_map; 2666 int idx; 2667 2668 sc = ss->sc; 2669 ifp = sc->ifp; 2670 rx = &ss->rx_big; 2671 idx = rx->cnt & rx->mask; 2672 rx->cnt += rx->nbufs; 2673 /* save a pointer to the received mbuf */ 2674 m = rx->info[idx].m; 2675 /* try to replace the received mbuf */ 2676 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2677 /* drop the frame -- the old mbuf is re-cycled */ 2678 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2679 return; 2680 } 2681 2682 /* unmap the received buffer */ 2683 old_map = rx->info[idx].map; 2684 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2685 bus_dmamap_unload(rx->dmat, old_map); 2686 2687 /* swap the bus_dmamap_t's */ 2688 rx->info[idx].map = rx->extra_map; 2689 rx->extra_map = old_map; 2690 2691 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2692 * aligned */ 2693 m->m_data += MXGEFW_PAD; 2694 2695 m->m_pkthdr.rcvif = ifp; 2696 m->m_len = m->m_pkthdr.len = len; 2697 ss->ipackets++; 2698 eh = mtod(m, struct ether_header *); 2699 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2700 mxge_vlan_tag_remove(m, &csum); 2701 } 2702 /* flowid only valid if RSS hashing is enabled */ 2703 if (sc->num_slices > 1) { 2704 m->m_pkthdr.flowid = (ss - sc->ss); 2705 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2706 } 2707 /* if the checksum is valid, mark it in the mbuf header */ 2708 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2709 (0 == mxge_rx_csum(m, csum))) { 2710 /* Tell the stack that the checksum is good */ 2711 m->m_pkthdr.csum_data = 0xffff; 2712 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2713 CSUM_DATA_VALID; 2714 2715 #if defined(INET) || defined (INET6) 2716 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2717 return; 2718 #endif 2719 } 2720 /* pass the frame up the stack */ 2721 (*ifp->if_input)(ifp, m); 2722 } 2723 2724 static inline void 2725 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2726 uint32_t csum, int lro) 2727 { 2728 mxge_softc_t *sc; 2729 struct ifnet *ifp; 2730 struct ether_header *eh; 2731 struct mbuf *m; 2732 mxge_rx_ring_t *rx; 2733 bus_dmamap_t old_map; 2734 int idx; 2735 2736 sc = ss->sc; 2737 ifp = sc->ifp; 2738 rx = &ss->rx_small; 2739 idx = rx->cnt & rx->mask; 2740 rx->cnt++; 2741 /* save a pointer to the received mbuf */ 2742 m = rx->info[idx].m; 2743 /* try to replace the received mbuf */ 2744 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2745 /* drop the frame -- the old mbuf is re-cycled */ 2746 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2747 return; 2748 } 2749 2750 /* unmap the received buffer */ 2751 old_map = rx->info[idx].map; 2752 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2753 bus_dmamap_unload(rx->dmat, old_map); 2754 2755 /* swap the bus_dmamap_t's */ 2756 rx->info[idx].map = rx->extra_map; 2757 rx->extra_map = old_map; 2758 2759 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2760 * aligned */ 2761 m->m_data += MXGEFW_PAD; 2762 2763 m->m_pkthdr.rcvif = ifp; 2764 m->m_len = m->m_pkthdr.len = len; 2765 ss->ipackets++; 2766 eh = mtod(m, struct ether_header *); 2767 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2768 mxge_vlan_tag_remove(m, &csum); 2769 } 2770 /* flowid only valid if RSS hashing is enabled */ 2771 if (sc->num_slices > 1) { 2772 m->m_pkthdr.flowid = (ss - sc->ss); 2773 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2774 } 2775 /* if the checksum is valid, mark it in the mbuf header */ 2776 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2777 (0 == mxge_rx_csum(m, csum))) { 2778 /* Tell the stack that the checksum is good */ 2779 m->m_pkthdr.csum_data = 0xffff; 2780 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2781 CSUM_DATA_VALID; 2782 2783 #if defined(INET) || defined (INET6) 2784 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2785 return; 2786 #endif 2787 } 2788 /* pass the frame up the stack */ 2789 (*ifp->if_input)(ifp, m); 2790 } 2791 2792 static inline void 2793 mxge_clean_rx_done(struct mxge_slice_state *ss) 2794 { 2795 mxge_rx_done_t *rx_done = &ss->rx_done; 2796 int limit = 0; 2797 uint16_t length; 2798 uint16_t checksum; 2799 int lro; 2800 2801 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2802 while (rx_done->entry[rx_done->idx].length != 0) { 2803 length = ntohs(rx_done->entry[rx_done->idx].length); 2804 rx_done->entry[rx_done->idx].length = 0; 2805 checksum = rx_done->entry[rx_done->idx].checksum; 2806 if (length <= (MHLEN - MXGEFW_PAD)) 2807 mxge_rx_done_small(ss, length, checksum, lro); 2808 else 2809 mxge_rx_done_big(ss, length, checksum, lro); 2810 rx_done->cnt++; 2811 rx_done->idx = rx_done->cnt & rx_done->mask; 2812 2813 /* limit potential for livelock */ 2814 if (__predict_false(++limit > rx_done->mask / 2)) 2815 break; 2816 } 2817 #if defined(INET) || defined (INET6) 2818 tcp_lro_flush_all(&ss->lc); 2819 #endif 2820 } 2821 2822 2823 static inline void 2824 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2825 { 2826 struct ifnet *ifp; 2827 mxge_tx_ring_t *tx; 2828 struct mbuf *m; 2829 bus_dmamap_t map; 2830 int idx; 2831 int *flags; 2832 2833 tx = &ss->tx; 2834 ifp = ss->sc->ifp; 2835 while (tx->pkt_done != mcp_idx) { 2836 idx = tx->done & tx->mask; 2837 tx->done++; 2838 m = tx->info[idx].m; 2839 /* mbuf and DMA map only attached to the first 2840 segment per-mbuf */ 2841 if (m != NULL) { 2842 ss->obytes += m->m_pkthdr.len; 2843 if (m->m_flags & M_MCAST) 2844 ss->omcasts++; 2845 ss->opackets++; 2846 tx->info[idx].m = NULL; 2847 map = tx->info[idx].map; 2848 bus_dmamap_unload(tx->dmat, map); 2849 m_freem(m); 2850 } 2851 if (tx->info[idx].flag) { 2852 tx->info[idx].flag = 0; 2853 tx->pkt_done++; 2854 } 2855 } 2856 2857 /* If we have space, clear IFF_OACTIVE to tell the stack that 2858 its OK to send packets */ 2859 #ifdef IFNET_BUF_RING 2860 flags = &ss->if_drv_flags; 2861 #else 2862 flags = &ifp->if_drv_flags; 2863 #endif 2864 mtx_lock(&ss->tx.mtx); 2865 if ((*flags) & IFF_DRV_OACTIVE && 2866 tx->req - tx->done < (tx->mask + 1)/4) { 2867 *(flags) &= ~IFF_DRV_OACTIVE; 2868 ss->tx.wake++; 2869 mxge_start_locked(ss); 2870 } 2871 #ifdef IFNET_BUF_RING 2872 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2873 /* let the NIC stop polling this queue, since there 2874 * are no more transmits pending */ 2875 if (tx->req == tx->done) { 2876 *tx->send_stop = 1; 2877 tx->queue_active = 0; 2878 tx->deactivate++; 2879 wmb(); 2880 } 2881 } 2882 #endif 2883 mtx_unlock(&ss->tx.mtx); 2884 2885 } 2886 2887 static struct mxge_media_type mxge_xfp_media_types[] = 2888 { 2889 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2890 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2891 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2892 {0, (1 << 5), "10GBASE-ER"}, 2893 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2894 {0, (1 << 3), "10GBASE-SW"}, 2895 {0, (1 << 2), "10GBASE-LW"}, 2896 {0, (1 << 1), "10GBASE-EW"}, 2897 {0, (1 << 0), "Reserved"} 2898 }; 2899 static struct mxge_media_type mxge_sfp_media_types[] = 2900 { 2901 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2902 {0, (1 << 7), "Reserved"}, 2903 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2904 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2905 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2906 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2907 }; 2908 2909 static void 2910 mxge_media_set(mxge_softc_t *sc, int media_type) 2911 { 2912 2913 2914 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2915 0, NULL); 2916 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2917 sc->current_media = media_type; 2918 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2919 } 2920 2921 static void 2922 mxge_media_init(mxge_softc_t *sc) 2923 { 2924 char *ptr; 2925 int i; 2926 2927 ifmedia_removeall(&sc->media); 2928 mxge_media_set(sc, IFM_AUTO); 2929 2930 /* 2931 * parse the product code to deterimine the interface type 2932 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2933 * after the 3rd dash in the driver's cached copy of the 2934 * EEPROM's product code string. 2935 */ 2936 ptr = sc->product_code_string; 2937 if (ptr == NULL) { 2938 device_printf(sc->dev, "Missing product code\n"); 2939 return; 2940 } 2941 2942 for (i = 0; i < 3; i++, ptr++) { 2943 ptr = strchr(ptr, '-'); 2944 if (ptr == NULL) { 2945 device_printf(sc->dev, 2946 "only %d dashes in PC?!?\n", i); 2947 return; 2948 } 2949 } 2950 if (*ptr == 'C' || *(ptr +1) == 'C') { 2951 /* -C is CX4 */ 2952 sc->connector = MXGE_CX4; 2953 mxge_media_set(sc, IFM_10G_CX4); 2954 } else if (*ptr == 'Q') { 2955 /* -Q is Quad Ribbon Fiber */ 2956 sc->connector = MXGE_QRF; 2957 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2958 /* FreeBSD has no media type for Quad ribbon fiber */ 2959 } else if (*ptr == 'R') { 2960 /* -R is XFP */ 2961 sc->connector = MXGE_XFP; 2962 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2963 /* -S or -2S is SFP+ */ 2964 sc->connector = MXGE_SFP; 2965 } else { 2966 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2967 } 2968 } 2969 2970 /* 2971 * Determine the media type for a NIC. Some XFPs will identify 2972 * themselves only when their link is up, so this is initiated via a 2973 * link up interrupt. However, this can potentially take up to 2974 * several milliseconds, so it is run via the watchdog routine, rather 2975 * than in the interrupt handler itself. 2976 */ 2977 static void 2978 mxge_media_probe(mxge_softc_t *sc) 2979 { 2980 mxge_cmd_t cmd; 2981 char *cage_type; 2982 2983 struct mxge_media_type *mxge_media_types = NULL; 2984 int i, err, ms, mxge_media_type_entries; 2985 uint32_t byte; 2986 2987 sc->need_media_probe = 0; 2988 2989 if (sc->connector == MXGE_XFP) { 2990 /* -R is XFP */ 2991 mxge_media_types = mxge_xfp_media_types; 2992 mxge_media_type_entries = 2993 nitems(mxge_xfp_media_types); 2994 byte = MXGE_XFP_COMPLIANCE_BYTE; 2995 cage_type = "XFP"; 2996 } else if (sc->connector == MXGE_SFP) { 2997 /* -S or -2S is SFP+ */ 2998 mxge_media_types = mxge_sfp_media_types; 2999 mxge_media_type_entries = 3000 nitems(mxge_sfp_media_types); 3001 cage_type = "SFP+"; 3002 byte = 3; 3003 } else { 3004 /* nothing to do; media type cannot change */ 3005 return; 3006 } 3007 3008 /* 3009 * At this point we know the NIC has an XFP cage, so now we 3010 * try to determine what is in the cage by using the 3011 * firmware's XFP I2C commands to read the XFP 10GbE compilance 3012 * register. We read just one byte, which may take over 3013 * a millisecond 3014 */ 3015 3016 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3017 cmd.data1 = byte; 3018 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3019 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3020 device_printf(sc->dev, "failed to read XFP\n"); 3021 } 3022 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3023 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3024 } 3025 if (err != MXGEFW_CMD_OK) { 3026 return; 3027 } 3028 3029 /* now we wait for the data to be cached */ 3030 cmd.data0 = byte; 3031 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3032 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3033 DELAY(1000); 3034 cmd.data0 = byte; 3035 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3036 } 3037 if (err != MXGEFW_CMD_OK) { 3038 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3039 cage_type, err, ms); 3040 return; 3041 } 3042 3043 if (cmd.data0 == mxge_media_types[0].bitmask) { 3044 if (mxge_verbose) 3045 device_printf(sc->dev, "%s:%s\n", cage_type, 3046 mxge_media_types[0].name); 3047 if (sc->current_media != mxge_media_types[0].flag) { 3048 mxge_media_init(sc); 3049 mxge_media_set(sc, mxge_media_types[0].flag); 3050 } 3051 return; 3052 } 3053 for (i = 1; i < mxge_media_type_entries; i++) { 3054 if (cmd.data0 & mxge_media_types[i].bitmask) { 3055 if (mxge_verbose) 3056 device_printf(sc->dev, "%s:%s\n", 3057 cage_type, 3058 mxge_media_types[i].name); 3059 3060 if (sc->current_media != mxge_media_types[i].flag) { 3061 mxge_media_init(sc); 3062 mxge_media_set(sc, mxge_media_types[i].flag); 3063 } 3064 return; 3065 } 3066 } 3067 if (mxge_verbose) 3068 device_printf(sc->dev, "%s media 0x%x unknown\n", 3069 cage_type, cmd.data0); 3070 3071 return; 3072 } 3073 3074 static void 3075 mxge_intr(void *arg) 3076 { 3077 struct mxge_slice_state *ss = arg; 3078 mxge_softc_t *sc = ss->sc; 3079 mcp_irq_data_t *stats = ss->fw_stats; 3080 mxge_tx_ring_t *tx = &ss->tx; 3081 mxge_rx_done_t *rx_done = &ss->rx_done; 3082 uint32_t send_done_count; 3083 uint8_t valid; 3084 3085 3086 #ifndef IFNET_BUF_RING 3087 /* an interrupt on a non-zero slice is implicitly valid 3088 since MSI-X irqs are not shared */ 3089 if (ss != sc->ss) { 3090 mxge_clean_rx_done(ss); 3091 *ss->irq_claim = be32toh(3); 3092 return; 3093 } 3094 #endif 3095 3096 /* make sure the DMA has finished */ 3097 if (!stats->valid) { 3098 return; 3099 } 3100 valid = stats->valid; 3101 3102 if (sc->legacy_irq) { 3103 /* lower legacy IRQ */ 3104 *sc->irq_deassert = 0; 3105 if (!mxge_deassert_wait) 3106 /* don't wait for conf. that irq is low */ 3107 stats->valid = 0; 3108 } else { 3109 stats->valid = 0; 3110 } 3111 3112 /* loop while waiting for legacy irq deassertion */ 3113 do { 3114 /* check for transmit completes and receives */ 3115 send_done_count = be32toh(stats->send_done_count); 3116 while ((send_done_count != tx->pkt_done) || 3117 (rx_done->entry[rx_done->idx].length != 0)) { 3118 if (send_done_count != tx->pkt_done) 3119 mxge_tx_done(ss, (int)send_done_count); 3120 mxge_clean_rx_done(ss); 3121 send_done_count = be32toh(stats->send_done_count); 3122 } 3123 if (sc->legacy_irq && mxge_deassert_wait) 3124 wmb(); 3125 } while (*((volatile uint8_t *) &stats->valid)); 3126 3127 /* fw link & error stats meaningful only on the first slice */ 3128 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3129 if (sc->link_state != stats->link_up) { 3130 sc->link_state = stats->link_up; 3131 if (sc->link_state) { 3132 if_link_state_change(sc->ifp, LINK_STATE_UP); 3133 if (mxge_verbose) 3134 device_printf(sc->dev, "link up\n"); 3135 } else { 3136 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3137 if (mxge_verbose) 3138 device_printf(sc->dev, "link down\n"); 3139 } 3140 sc->need_media_probe = 1; 3141 } 3142 if (sc->rdma_tags_available != 3143 be32toh(stats->rdma_tags_available)) { 3144 sc->rdma_tags_available = 3145 be32toh(stats->rdma_tags_available); 3146 device_printf(sc->dev, "RDMA timed out! %d tags " 3147 "left\n", sc->rdma_tags_available); 3148 } 3149 3150 if (stats->link_down) { 3151 sc->down_cnt += stats->link_down; 3152 sc->link_state = 0; 3153 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3154 } 3155 } 3156 3157 /* check to see if we have rx token to pass back */ 3158 if (valid & 0x1) 3159 *ss->irq_claim = be32toh(3); 3160 *(ss->irq_claim + 1) = be32toh(3); 3161 } 3162 3163 static void 3164 mxge_init(void *arg) 3165 { 3166 mxge_softc_t *sc = arg; 3167 struct ifnet *ifp = sc->ifp; 3168 3169 3170 mtx_lock(&sc->driver_mtx); 3171 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3172 (void) mxge_open(sc); 3173 mtx_unlock(&sc->driver_mtx); 3174 } 3175 3176 3177 3178 static void 3179 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3180 { 3181 int i; 3182 3183 #if defined(INET) || defined(INET6) 3184 tcp_lro_free(&ss->lc); 3185 #endif 3186 for (i = 0; i <= ss->rx_big.mask; i++) { 3187 if (ss->rx_big.info[i].m == NULL) 3188 continue; 3189 bus_dmamap_unload(ss->rx_big.dmat, 3190 ss->rx_big.info[i].map); 3191 m_freem(ss->rx_big.info[i].m); 3192 ss->rx_big.info[i].m = NULL; 3193 } 3194 3195 for (i = 0; i <= ss->rx_small.mask; i++) { 3196 if (ss->rx_small.info[i].m == NULL) 3197 continue; 3198 bus_dmamap_unload(ss->rx_small.dmat, 3199 ss->rx_small.info[i].map); 3200 m_freem(ss->rx_small.info[i].m); 3201 ss->rx_small.info[i].m = NULL; 3202 } 3203 3204 /* transmit ring used only on the first slice */ 3205 if (ss->tx.info == NULL) 3206 return; 3207 3208 for (i = 0; i <= ss->tx.mask; i++) { 3209 ss->tx.info[i].flag = 0; 3210 if (ss->tx.info[i].m == NULL) 3211 continue; 3212 bus_dmamap_unload(ss->tx.dmat, 3213 ss->tx.info[i].map); 3214 m_freem(ss->tx.info[i].m); 3215 ss->tx.info[i].m = NULL; 3216 } 3217 } 3218 3219 static void 3220 mxge_free_mbufs(mxge_softc_t *sc) 3221 { 3222 int slice; 3223 3224 for (slice = 0; slice < sc->num_slices; slice++) 3225 mxge_free_slice_mbufs(&sc->ss[slice]); 3226 } 3227 3228 static void 3229 mxge_free_slice_rings(struct mxge_slice_state *ss) 3230 { 3231 int i; 3232 3233 3234 if (ss->rx_done.entry != NULL) 3235 mxge_dma_free(&ss->rx_done.dma); 3236 ss->rx_done.entry = NULL; 3237 3238 if (ss->tx.req_bytes != NULL) 3239 free(ss->tx.req_bytes, M_DEVBUF); 3240 ss->tx.req_bytes = NULL; 3241 3242 if (ss->tx.seg_list != NULL) 3243 free(ss->tx.seg_list, M_DEVBUF); 3244 ss->tx.seg_list = NULL; 3245 3246 if (ss->rx_small.shadow != NULL) 3247 free(ss->rx_small.shadow, M_DEVBUF); 3248 ss->rx_small.shadow = NULL; 3249 3250 if (ss->rx_big.shadow != NULL) 3251 free(ss->rx_big.shadow, M_DEVBUF); 3252 ss->rx_big.shadow = NULL; 3253 3254 if (ss->tx.info != NULL) { 3255 if (ss->tx.dmat != NULL) { 3256 for (i = 0; i <= ss->tx.mask; i++) { 3257 bus_dmamap_destroy(ss->tx.dmat, 3258 ss->tx.info[i].map); 3259 } 3260 bus_dma_tag_destroy(ss->tx.dmat); 3261 } 3262 free(ss->tx.info, M_DEVBUF); 3263 } 3264 ss->tx.info = NULL; 3265 3266 if (ss->rx_small.info != NULL) { 3267 if (ss->rx_small.dmat != NULL) { 3268 for (i = 0; i <= ss->rx_small.mask; i++) { 3269 bus_dmamap_destroy(ss->rx_small.dmat, 3270 ss->rx_small.info[i].map); 3271 } 3272 bus_dmamap_destroy(ss->rx_small.dmat, 3273 ss->rx_small.extra_map); 3274 bus_dma_tag_destroy(ss->rx_small.dmat); 3275 } 3276 free(ss->rx_small.info, M_DEVBUF); 3277 } 3278 ss->rx_small.info = NULL; 3279 3280 if (ss->rx_big.info != NULL) { 3281 if (ss->rx_big.dmat != NULL) { 3282 for (i = 0; i <= ss->rx_big.mask; i++) { 3283 bus_dmamap_destroy(ss->rx_big.dmat, 3284 ss->rx_big.info[i].map); 3285 } 3286 bus_dmamap_destroy(ss->rx_big.dmat, 3287 ss->rx_big.extra_map); 3288 bus_dma_tag_destroy(ss->rx_big.dmat); 3289 } 3290 free(ss->rx_big.info, M_DEVBUF); 3291 } 3292 ss->rx_big.info = NULL; 3293 } 3294 3295 static void 3296 mxge_free_rings(mxge_softc_t *sc) 3297 { 3298 int slice; 3299 3300 for (slice = 0; slice < sc->num_slices; slice++) 3301 mxge_free_slice_rings(&sc->ss[slice]); 3302 } 3303 3304 static int 3305 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3306 int tx_ring_entries) 3307 { 3308 mxge_softc_t *sc = ss->sc; 3309 size_t bytes; 3310 int err, i; 3311 3312 /* allocate per-slice receive resources */ 3313 3314 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3315 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3316 3317 /* allocate the rx shadow rings */ 3318 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3319 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3320 3321 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3322 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3323 3324 /* allocate the rx host info rings */ 3325 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3326 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3327 3328 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3329 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3330 3331 /* allocate the rx busdma resources */ 3332 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3333 1, /* alignment */ 3334 4096, /* boundary */ 3335 BUS_SPACE_MAXADDR, /* low */ 3336 BUS_SPACE_MAXADDR, /* high */ 3337 NULL, NULL, /* filter */ 3338 MHLEN, /* maxsize */ 3339 1, /* num segs */ 3340 MHLEN, /* maxsegsize */ 3341 BUS_DMA_ALLOCNOW, /* flags */ 3342 NULL, NULL, /* lock */ 3343 &ss->rx_small.dmat); /* tag */ 3344 if (err != 0) { 3345 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3346 err); 3347 return err; 3348 } 3349 3350 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3351 1, /* alignment */ 3352 #if MXGE_VIRT_JUMBOS 3353 4096, /* boundary */ 3354 #else 3355 0, /* boundary */ 3356 #endif 3357 BUS_SPACE_MAXADDR, /* low */ 3358 BUS_SPACE_MAXADDR, /* high */ 3359 NULL, NULL, /* filter */ 3360 3*4096, /* maxsize */ 3361 #if MXGE_VIRT_JUMBOS 3362 3, /* num segs */ 3363 4096, /* maxsegsize*/ 3364 #else 3365 1, /* num segs */ 3366 MJUM9BYTES, /* maxsegsize*/ 3367 #endif 3368 BUS_DMA_ALLOCNOW, /* flags */ 3369 NULL, NULL, /* lock */ 3370 &ss->rx_big.dmat); /* tag */ 3371 if (err != 0) { 3372 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3373 err); 3374 return err; 3375 } 3376 for (i = 0; i <= ss->rx_small.mask; i++) { 3377 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3378 &ss->rx_small.info[i].map); 3379 if (err != 0) { 3380 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3381 err); 3382 return err; 3383 } 3384 } 3385 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3386 &ss->rx_small.extra_map); 3387 if (err != 0) { 3388 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3389 err); 3390 return err; 3391 } 3392 3393 for (i = 0; i <= ss->rx_big.mask; i++) { 3394 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3395 &ss->rx_big.info[i].map); 3396 if (err != 0) { 3397 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3398 err); 3399 return err; 3400 } 3401 } 3402 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3403 &ss->rx_big.extra_map); 3404 if (err != 0) { 3405 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3406 err); 3407 return err; 3408 } 3409 3410 /* now allocate TX resources */ 3411 3412 #ifndef IFNET_BUF_RING 3413 /* only use a single TX ring for now */ 3414 if (ss != ss->sc->ss) 3415 return 0; 3416 #endif 3417 3418 ss->tx.mask = tx_ring_entries - 1; 3419 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3420 3421 3422 /* allocate the tx request copy block */ 3423 bytes = 8 + 3424 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3425 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3426 /* ensure req_list entries are aligned to 8 bytes */ 3427 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3428 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3429 3430 /* allocate the tx busdma segment list */ 3431 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3432 ss->tx.seg_list = (bus_dma_segment_t *) 3433 malloc(bytes, M_DEVBUF, M_WAITOK); 3434 3435 /* allocate the tx host info ring */ 3436 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3437 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3438 3439 /* allocate the tx busdma resources */ 3440 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3441 1, /* alignment */ 3442 sc->tx_boundary, /* boundary */ 3443 BUS_SPACE_MAXADDR, /* low */ 3444 BUS_SPACE_MAXADDR, /* high */ 3445 NULL, NULL, /* filter */ 3446 65536 + 256, /* maxsize */ 3447 ss->tx.max_desc - 2, /* num segs */ 3448 sc->tx_boundary, /* maxsegsz */ 3449 BUS_DMA_ALLOCNOW, /* flags */ 3450 NULL, NULL, /* lock */ 3451 &ss->tx.dmat); /* tag */ 3452 3453 if (err != 0) { 3454 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3455 err); 3456 return err; 3457 } 3458 3459 /* now use these tags to setup dmamaps for each slot 3460 in the ring */ 3461 for (i = 0; i <= ss->tx.mask; i++) { 3462 err = bus_dmamap_create(ss->tx.dmat, 0, 3463 &ss->tx.info[i].map); 3464 if (err != 0) { 3465 device_printf(sc->dev, "Err %d tx dmamap\n", 3466 err); 3467 return err; 3468 } 3469 } 3470 return 0; 3471 3472 } 3473 3474 static int 3475 mxge_alloc_rings(mxge_softc_t *sc) 3476 { 3477 mxge_cmd_t cmd; 3478 int tx_ring_size; 3479 int tx_ring_entries, rx_ring_entries; 3480 int err, slice; 3481 3482 /* get ring sizes */ 3483 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3484 tx_ring_size = cmd.data0; 3485 if (err != 0) { 3486 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3487 goto abort; 3488 } 3489 3490 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3491 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3492 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3493 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3494 IFQ_SET_READY(&sc->ifp->if_snd); 3495 3496 for (slice = 0; slice < sc->num_slices; slice++) { 3497 err = mxge_alloc_slice_rings(&sc->ss[slice], 3498 rx_ring_entries, 3499 tx_ring_entries); 3500 if (err != 0) 3501 goto abort; 3502 } 3503 return 0; 3504 3505 abort: 3506 mxge_free_rings(sc); 3507 return err; 3508 3509 } 3510 3511 3512 static void 3513 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3514 { 3515 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3516 3517 if (bufsize < MCLBYTES) { 3518 /* easy, everything fits in a single buffer */ 3519 *big_buf_size = MCLBYTES; 3520 *cl_size = MCLBYTES; 3521 *nbufs = 1; 3522 return; 3523 } 3524 3525 if (bufsize < MJUMPAGESIZE) { 3526 /* still easy, everything still fits in a single buffer */ 3527 *big_buf_size = MJUMPAGESIZE; 3528 *cl_size = MJUMPAGESIZE; 3529 *nbufs = 1; 3530 return; 3531 } 3532 #if MXGE_VIRT_JUMBOS 3533 /* now we need to use virtually contiguous buffers */ 3534 *cl_size = MJUM9BYTES; 3535 *big_buf_size = 4096; 3536 *nbufs = mtu / 4096 + 1; 3537 /* needs to be a power of two, so round up */ 3538 if (*nbufs == 3) 3539 *nbufs = 4; 3540 #else 3541 *cl_size = MJUM9BYTES; 3542 *big_buf_size = MJUM9BYTES; 3543 *nbufs = 1; 3544 #endif 3545 } 3546 3547 static int 3548 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3549 { 3550 mxge_softc_t *sc; 3551 mxge_cmd_t cmd; 3552 bus_dmamap_t map; 3553 int err, i, slice; 3554 3555 3556 sc = ss->sc; 3557 slice = ss - sc->ss; 3558 3559 #if defined(INET) || defined(INET6) 3560 (void)tcp_lro_init(&ss->lc); 3561 #endif 3562 ss->lc.ifp = sc->ifp; 3563 3564 /* get the lanai pointers to the send and receive rings */ 3565 3566 err = 0; 3567 #ifndef IFNET_BUF_RING 3568 /* We currently only send from the first slice */ 3569 if (slice == 0) { 3570 #endif 3571 cmd.data0 = slice; 3572 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3573 ss->tx.lanai = 3574 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3575 ss->tx.send_go = (volatile uint32_t *) 3576 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3577 ss->tx.send_stop = (volatile uint32_t *) 3578 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3579 #ifndef IFNET_BUF_RING 3580 } 3581 #endif 3582 cmd.data0 = slice; 3583 err |= mxge_send_cmd(sc, 3584 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3585 ss->rx_small.lanai = 3586 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3587 cmd.data0 = slice; 3588 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3589 ss->rx_big.lanai = 3590 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3591 3592 if (err != 0) { 3593 device_printf(sc->dev, 3594 "failed to get ring sizes or locations\n"); 3595 return EIO; 3596 } 3597 3598 /* stock receive rings */ 3599 for (i = 0; i <= ss->rx_small.mask; i++) { 3600 map = ss->rx_small.info[i].map; 3601 err = mxge_get_buf_small(ss, map, i); 3602 if (err) { 3603 device_printf(sc->dev, "alloced %d/%d smalls\n", 3604 i, ss->rx_small.mask + 1); 3605 return ENOMEM; 3606 } 3607 } 3608 for (i = 0; i <= ss->rx_big.mask; i++) { 3609 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3610 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3611 } 3612 ss->rx_big.nbufs = nbufs; 3613 ss->rx_big.cl_size = cl_size; 3614 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3615 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3616 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3617 map = ss->rx_big.info[i].map; 3618 err = mxge_get_buf_big(ss, map, i); 3619 if (err) { 3620 device_printf(sc->dev, "alloced %d/%d bigs\n", 3621 i, ss->rx_big.mask + 1); 3622 return ENOMEM; 3623 } 3624 } 3625 return 0; 3626 } 3627 3628 static int 3629 mxge_open(mxge_softc_t *sc) 3630 { 3631 mxge_cmd_t cmd; 3632 int err, big_bytes, nbufs, slice, cl_size, i; 3633 bus_addr_t bus; 3634 volatile uint8_t *itable; 3635 struct mxge_slice_state *ss; 3636 3637 /* Copy the MAC address in case it was overridden */ 3638 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3639 3640 err = mxge_reset(sc, 1); 3641 if (err != 0) { 3642 device_printf(sc->dev, "failed to reset\n"); 3643 return EIO; 3644 } 3645 3646 if (sc->num_slices > 1) { 3647 /* setup the indirection table */ 3648 cmd.data0 = sc->num_slices; 3649 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3650 &cmd); 3651 3652 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3653 &cmd); 3654 if (err != 0) { 3655 device_printf(sc->dev, 3656 "failed to setup rss tables\n"); 3657 return err; 3658 } 3659 3660 /* just enable an identity mapping */ 3661 itable = sc->sram + cmd.data0; 3662 for (i = 0; i < sc->num_slices; i++) 3663 itable[i] = (uint8_t)i; 3664 3665 cmd.data0 = 1; 3666 cmd.data1 = mxge_rss_hash_type; 3667 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3668 if (err != 0) { 3669 device_printf(sc->dev, "failed to enable slices\n"); 3670 return err; 3671 } 3672 } 3673 3674 3675 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3676 3677 cmd.data0 = nbufs; 3678 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3679 &cmd); 3680 /* error is only meaningful if we're trying to set 3681 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3682 if (err && nbufs > 1) { 3683 device_printf(sc->dev, 3684 "Failed to set alway-use-n to %d\n", 3685 nbufs); 3686 return EIO; 3687 } 3688 /* Give the firmware the mtu and the big and small buffer 3689 sizes. The firmware wants the big buf size to be a power 3690 of two. Luckily, FreeBSD's clusters are powers of two */ 3691 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3692 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3693 cmd.data0 = MHLEN - MXGEFW_PAD; 3694 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3695 &cmd); 3696 cmd.data0 = big_bytes; 3697 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3698 3699 if (err != 0) { 3700 device_printf(sc->dev, "failed to setup params\n"); 3701 goto abort; 3702 } 3703 3704 /* Now give him the pointer to the stats block */ 3705 for (slice = 0; 3706 #ifdef IFNET_BUF_RING 3707 slice < sc->num_slices; 3708 #else 3709 slice < 1; 3710 #endif 3711 slice++) { 3712 ss = &sc->ss[slice]; 3713 cmd.data0 = 3714 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3715 cmd.data1 = 3716 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3717 cmd.data2 = sizeof(struct mcp_irq_data); 3718 cmd.data2 |= (slice << 16); 3719 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3720 } 3721 3722 if (err != 0) { 3723 bus = sc->ss->fw_stats_dma.bus_addr; 3724 bus += offsetof(struct mcp_irq_data, send_done_count); 3725 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3726 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3727 err = mxge_send_cmd(sc, 3728 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3729 &cmd); 3730 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3731 sc->fw_multicast_support = 0; 3732 } else { 3733 sc->fw_multicast_support = 1; 3734 } 3735 3736 if (err != 0) { 3737 device_printf(sc->dev, "failed to setup params\n"); 3738 goto abort; 3739 } 3740 3741 for (slice = 0; slice < sc->num_slices; slice++) { 3742 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3743 if (err != 0) { 3744 device_printf(sc->dev, "couldn't open slice %d\n", 3745 slice); 3746 goto abort; 3747 } 3748 } 3749 3750 /* Finally, start the firmware running */ 3751 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3752 if (err) { 3753 device_printf(sc->dev, "Couldn't bring up link\n"); 3754 goto abort; 3755 } 3756 #ifdef IFNET_BUF_RING 3757 for (slice = 0; slice < sc->num_slices; slice++) { 3758 ss = &sc->ss[slice]; 3759 ss->if_drv_flags |= IFF_DRV_RUNNING; 3760 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3761 } 3762 #endif 3763 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3764 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3765 3766 return 0; 3767 3768 3769 abort: 3770 mxge_free_mbufs(sc); 3771 3772 return err; 3773 } 3774 3775 static int 3776 mxge_close(mxge_softc_t *sc, int down) 3777 { 3778 mxge_cmd_t cmd; 3779 int err, old_down_cnt; 3780 #ifdef IFNET_BUF_RING 3781 struct mxge_slice_state *ss; 3782 int slice; 3783 #endif 3784 3785 #ifdef IFNET_BUF_RING 3786 for (slice = 0; slice < sc->num_slices; slice++) { 3787 ss = &sc->ss[slice]; 3788 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3789 } 3790 #endif 3791 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3792 if (!down) { 3793 old_down_cnt = sc->down_cnt; 3794 wmb(); 3795 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3796 if (err) { 3797 device_printf(sc->dev, 3798 "Couldn't bring down link\n"); 3799 } 3800 if (old_down_cnt == sc->down_cnt) { 3801 /* wait for down irq */ 3802 DELAY(10 * sc->intr_coal_delay); 3803 } 3804 wmb(); 3805 if (old_down_cnt == sc->down_cnt) { 3806 device_printf(sc->dev, "never got down irq\n"); 3807 } 3808 } 3809 mxge_free_mbufs(sc); 3810 3811 return 0; 3812 } 3813 3814 static void 3815 mxge_setup_cfg_space(mxge_softc_t *sc) 3816 { 3817 device_t dev = sc->dev; 3818 int reg; 3819 uint16_t lnk, pectl; 3820 3821 /* find the PCIe link width and set max read request to 4KB*/ 3822 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3823 lnk = pci_read_config(dev, reg + 0x12, 2); 3824 sc->link_width = (lnk >> 4) & 0x3f; 3825 3826 if (sc->pectl == 0) { 3827 pectl = pci_read_config(dev, reg + 0x8, 2); 3828 pectl = (pectl & ~0x7000) | (5 << 12); 3829 pci_write_config(dev, reg + 0x8, pectl, 2); 3830 sc->pectl = pectl; 3831 } else { 3832 /* restore saved pectl after watchdog reset */ 3833 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3834 } 3835 } 3836 3837 /* Enable DMA and Memory space access */ 3838 pci_enable_busmaster(dev); 3839 } 3840 3841 static uint32_t 3842 mxge_read_reboot(mxge_softc_t *sc) 3843 { 3844 device_t dev = sc->dev; 3845 uint32_t vs; 3846 3847 /* find the vendor specific offset */ 3848 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3849 device_printf(sc->dev, 3850 "could not find vendor specific offset\n"); 3851 return (uint32_t)-1; 3852 } 3853 /* enable read32 mode */ 3854 pci_write_config(dev, vs + 0x10, 0x3, 1); 3855 /* tell NIC which register to read */ 3856 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3857 return (pci_read_config(dev, vs + 0x14, 4)); 3858 } 3859 3860 static void 3861 mxge_watchdog_reset(mxge_softc_t *sc) 3862 { 3863 struct pci_devinfo *dinfo; 3864 struct mxge_slice_state *ss; 3865 int err, running, s, num_tx_slices = 1; 3866 uint32_t reboot; 3867 uint16_t cmd; 3868 3869 err = ENXIO; 3870 3871 device_printf(sc->dev, "Watchdog reset!\n"); 3872 3873 /* 3874 * check to see if the NIC rebooted. If it did, then all of 3875 * PCI config space has been reset, and things like the 3876 * busmaster bit will be zero. If this is the case, then we 3877 * must restore PCI config space before the NIC can be used 3878 * again 3879 */ 3880 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3881 if (cmd == 0xffff) { 3882 /* 3883 * maybe the watchdog caught the NIC rebooting; wait 3884 * up to 100ms for it to finish. If it does not come 3885 * back, then give up 3886 */ 3887 DELAY(1000*100); 3888 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3889 if (cmd == 0xffff) { 3890 device_printf(sc->dev, "NIC disappeared!\n"); 3891 } 3892 } 3893 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3894 /* print the reboot status */ 3895 reboot = mxge_read_reboot(sc); 3896 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3897 reboot); 3898 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3899 if (running) { 3900 3901 /* 3902 * quiesce NIC so that TX routines will not try to 3903 * xmit after restoration of BAR 3904 */ 3905 3906 /* Mark the link as down */ 3907 if (sc->link_state) { 3908 sc->link_state = 0; 3909 if_link_state_change(sc->ifp, 3910 LINK_STATE_DOWN); 3911 } 3912 #ifdef IFNET_BUF_RING 3913 num_tx_slices = sc->num_slices; 3914 #endif 3915 /* grab all TX locks to ensure no tx */ 3916 for (s = 0; s < num_tx_slices; s++) { 3917 ss = &sc->ss[s]; 3918 mtx_lock(&ss->tx.mtx); 3919 } 3920 mxge_close(sc, 1); 3921 } 3922 /* restore PCI configuration space */ 3923 dinfo = device_get_ivars(sc->dev); 3924 pci_cfg_restore(sc->dev, dinfo); 3925 3926 /* and redo any changes we made to our config space */ 3927 mxge_setup_cfg_space(sc); 3928 3929 /* reload f/w */ 3930 err = mxge_load_firmware(sc, 0); 3931 if (err) { 3932 device_printf(sc->dev, 3933 "Unable to re-load f/w\n"); 3934 } 3935 if (running) { 3936 if (!err) 3937 err = mxge_open(sc); 3938 /* release all TX locks */ 3939 for (s = 0; s < num_tx_slices; s++) { 3940 ss = &sc->ss[s]; 3941 #ifdef IFNET_BUF_RING 3942 mxge_start_locked(ss); 3943 #endif 3944 mtx_unlock(&ss->tx.mtx); 3945 } 3946 } 3947 sc->watchdog_resets++; 3948 } else { 3949 device_printf(sc->dev, 3950 "NIC did not reboot, not resetting\n"); 3951 err = 0; 3952 } 3953 if (err) { 3954 device_printf(sc->dev, "watchdog reset failed\n"); 3955 } else { 3956 if (sc->dying == 2) 3957 sc->dying = 0; 3958 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3959 } 3960 } 3961 3962 static void 3963 mxge_watchdog_task(void *arg, int pending) 3964 { 3965 mxge_softc_t *sc = arg; 3966 3967 3968 mtx_lock(&sc->driver_mtx); 3969 mxge_watchdog_reset(sc); 3970 mtx_unlock(&sc->driver_mtx); 3971 } 3972 3973 static void 3974 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3975 { 3976 tx = &sc->ss[slice].tx; 3977 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3978 device_printf(sc->dev, 3979 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3980 tx->req, tx->done, tx->queue_active); 3981 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3982 tx->activate, tx->deactivate); 3983 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3984 tx->pkt_done, 3985 be32toh(sc->ss->fw_stats->send_done_count)); 3986 } 3987 3988 static int 3989 mxge_watchdog(mxge_softc_t *sc) 3990 { 3991 mxge_tx_ring_t *tx; 3992 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3993 int i, err = 0; 3994 3995 /* see if we have outstanding transmits, which 3996 have been pending for more than mxge_ticks */ 3997 for (i = 0; 3998 #ifdef IFNET_BUF_RING 3999 (i < sc->num_slices) && (err == 0); 4000 #else 4001 (i < 1) && (err == 0); 4002 #endif 4003 i++) { 4004 tx = &sc->ss[i].tx; 4005 if (tx->req != tx->done && 4006 tx->watchdog_req != tx->watchdog_done && 4007 tx->done == tx->watchdog_done) { 4008 /* check for pause blocking before resetting */ 4009 if (tx->watchdog_rx_pause == rx_pause) { 4010 mxge_warn_stuck(sc, tx, i); 4011 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4012 return (ENXIO); 4013 } 4014 else 4015 device_printf(sc->dev, "Flow control blocking " 4016 "xmits, check link partner\n"); 4017 } 4018 4019 tx->watchdog_req = tx->req; 4020 tx->watchdog_done = tx->done; 4021 tx->watchdog_rx_pause = rx_pause; 4022 } 4023 4024 if (sc->need_media_probe) 4025 mxge_media_probe(sc); 4026 return (err); 4027 } 4028 4029 static uint64_t 4030 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 4031 { 4032 struct mxge_softc *sc; 4033 uint64_t rv; 4034 4035 sc = if_getsoftc(ifp); 4036 rv = 0; 4037 4038 switch (cnt) { 4039 case IFCOUNTER_IPACKETS: 4040 for (int s = 0; s < sc->num_slices; s++) 4041 rv += sc->ss[s].ipackets; 4042 return (rv); 4043 case IFCOUNTER_OPACKETS: 4044 for (int s = 0; s < sc->num_slices; s++) 4045 rv += sc->ss[s].opackets; 4046 return (rv); 4047 case IFCOUNTER_OERRORS: 4048 for (int s = 0; s < sc->num_slices; s++) 4049 rv += sc->ss[s].oerrors; 4050 return (rv); 4051 #ifdef IFNET_BUF_RING 4052 case IFCOUNTER_OBYTES: 4053 for (int s = 0; s < sc->num_slices; s++) 4054 rv += sc->ss[s].obytes; 4055 return (rv); 4056 case IFCOUNTER_OMCASTS: 4057 for (int s = 0; s < sc->num_slices; s++) 4058 rv += sc->ss[s].omcasts; 4059 return (rv); 4060 case IFCOUNTER_OQDROPS: 4061 for (int s = 0; s < sc->num_slices; s++) 4062 rv += sc->ss[s].tx.br->br_drops; 4063 return (rv); 4064 #endif 4065 default: 4066 return (if_get_counter_default(ifp, cnt)); 4067 } 4068 } 4069 4070 static void 4071 mxge_tick(void *arg) 4072 { 4073 mxge_softc_t *sc = arg; 4074 u_long pkts = 0; 4075 int err = 0; 4076 int running, ticks; 4077 uint16_t cmd; 4078 4079 ticks = mxge_ticks; 4080 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4081 if (running) { 4082 if (!sc->watchdog_countdown) { 4083 err = mxge_watchdog(sc); 4084 sc->watchdog_countdown = 4; 4085 } 4086 sc->watchdog_countdown--; 4087 } 4088 if (pkts == 0) { 4089 /* ensure NIC did not suffer h/w fault while idle */ 4090 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4091 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4092 sc->dying = 2; 4093 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4094 err = ENXIO; 4095 } 4096 /* look less often if NIC is idle */ 4097 ticks *= 4; 4098 } 4099 4100 if (err == 0) 4101 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4102 4103 } 4104 4105 static int 4106 mxge_media_change(struct ifnet *ifp) 4107 { 4108 return EINVAL; 4109 } 4110 4111 static int 4112 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4113 { 4114 struct ifnet *ifp = sc->ifp; 4115 int real_mtu, old_mtu; 4116 int err = 0; 4117 4118 4119 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4120 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4121 return EINVAL; 4122 mtx_lock(&sc->driver_mtx); 4123 old_mtu = ifp->if_mtu; 4124 ifp->if_mtu = mtu; 4125 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4126 mxge_close(sc, 0); 4127 err = mxge_open(sc); 4128 if (err != 0) { 4129 ifp->if_mtu = old_mtu; 4130 mxge_close(sc, 0); 4131 (void) mxge_open(sc); 4132 } 4133 } 4134 mtx_unlock(&sc->driver_mtx); 4135 return err; 4136 } 4137 4138 static void 4139 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4140 { 4141 mxge_softc_t *sc = ifp->if_softc; 4142 4143 4144 if (sc == NULL) 4145 return; 4146 ifmr->ifm_status = IFM_AVALID; 4147 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4148 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4149 ifmr->ifm_active |= sc->current_media; 4150 } 4151 4152 static int 4153 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 4154 { 4155 mxge_cmd_t cmd; 4156 uint32_t i2c_args; 4157 int i, ms, err; 4158 4159 4160 if (i2c->dev_addr != 0xA0 && 4161 i2c->dev_addr != 0xA2) 4162 return (EINVAL); 4163 if (i2c->len > sizeof(i2c->data)) 4164 return (EINVAL); 4165 4166 for (i = 0; i < i2c->len; i++) { 4167 i2c_args = i2c->dev_addr << 0x8; 4168 i2c_args |= i2c->offset + i; 4169 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 4170 cmd.data1 = i2c_args; 4171 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 4172 4173 if (err != MXGEFW_CMD_OK) 4174 return (EIO); 4175 /* now we wait for the data to be cached */ 4176 cmd.data0 = i2c_args & 0xff; 4177 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4178 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 4179 cmd.data0 = i2c_args & 0xff; 4180 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4181 if (err == EBUSY) 4182 DELAY(1000); 4183 } 4184 if (err != MXGEFW_CMD_OK) 4185 return (EIO); 4186 i2c->data[i] = cmd.data0; 4187 } 4188 return (0); 4189 } 4190 4191 static int 4192 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4193 { 4194 mxge_softc_t *sc = ifp->if_softc; 4195 struct ifreq *ifr = (struct ifreq *)data; 4196 struct ifi2creq i2c; 4197 int err, mask; 4198 4199 err = 0; 4200 switch (command) { 4201 case SIOCSIFMTU: 4202 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4203 break; 4204 4205 case SIOCSIFFLAGS: 4206 mtx_lock(&sc->driver_mtx); 4207 if (sc->dying) { 4208 mtx_unlock(&sc->driver_mtx); 4209 return EINVAL; 4210 } 4211 if (ifp->if_flags & IFF_UP) { 4212 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4213 err = mxge_open(sc); 4214 } else { 4215 /* take care of promis can allmulti 4216 flag chages */ 4217 mxge_change_promisc(sc, 4218 ifp->if_flags & IFF_PROMISC); 4219 mxge_set_multicast_list(sc); 4220 } 4221 } else { 4222 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4223 mxge_close(sc, 0); 4224 } 4225 } 4226 mtx_unlock(&sc->driver_mtx); 4227 break; 4228 4229 case SIOCADDMULTI: 4230 case SIOCDELMULTI: 4231 mtx_lock(&sc->driver_mtx); 4232 if (sc->dying) { 4233 mtx_unlock(&sc->driver_mtx); 4234 return (EINVAL); 4235 } 4236 mxge_set_multicast_list(sc); 4237 mtx_unlock(&sc->driver_mtx); 4238 break; 4239 4240 case SIOCSIFCAP: 4241 mtx_lock(&sc->driver_mtx); 4242 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4243 if (mask & IFCAP_TXCSUM) { 4244 if (IFCAP_TXCSUM & ifp->if_capenable) { 4245 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4246 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4247 } else { 4248 ifp->if_capenable |= IFCAP_TXCSUM; 4249 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4250 } 4251 } else if (mask & IFCAP_RXCSUM) { 4252 if (IFCAP_RXCSUM & ifp->if_capenable) { 4253 ifp->if_capenable &= ~IFCAP_RXCSUM; 4254 } else { 4255 ifp->if_capenable |= IFCAP_RXCSUM; 4256 } 4257 } 4258 if (mask & IFCAP_TSO4) { 4259 if (IFCAP_TSO4 & ifp->if_capenable) { 4260 ifp->if_capenable &= ~IFCAP_TSO4; 4261 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4262 ifp->if_capenable |= IFCAP_TSO4; 4263 ifp->if_hwassist |= CSUM_TSO; 4264 } else { 4265 printf("mxge requires tx checksum offload" 4266 " be enabled to use TSO\n"); 4267 err = EINVAL; 4268 } 4269 } 4270 #if IFCAP_TSO6 4271 if (mask & IFCAP_TXCSUM_IPV6) { 4272 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4273 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4274 | IFCAP_TSO6); 4275 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4276 | CSUM_UDP); 4277 } else { 4278 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4279 ifp->if_hwassist |= (CSUM_TCP_IPV6 4280 | CSUM_UDP_IPV6); 4281 } 4282 } else if (mask & IFCAP_RXCSUM_IPV6) { 4283 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4284 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4285 } else { 4286 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4287 } 4288 } 4289 if (mask & IFCAP_TSO6) { 4290 if (IFCAP_TSO6 & ifp->if_capenable) { 4291 ifp->if_capenable &= ~IFCAP_TSO6; 4292 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4293 ifp->if_capenable |= IFCAP_TSO6; 4294 ifp->if_hwassist |= CSUM_TSO; 4295 } else { 4296 printf("mxge requires tx checksum offload" 4297 " be enabled to use TSO\n"); 4298 err = EINVAL; 4299 } 4300 } 4301 #endif /*IFCAP_TSO6 */ 4302 4303 if (mask & IFCAP_LRO) 4304 ifp->if_capenable ^= IFCAP_LRO; 4305 if (mask & IFCAP_VLAN_HWTAGGING) 4306 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4307 if (mask & IFCAP_VLAN_HWTSO) 4308 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4309 4310 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4311 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4312 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4313 4314 mtx_unlock(&sc->driver_mtx); 4315 VLAN_CAPABILITIES(ifp); 4316 4317 break; 4318 4319 case SIOCGIFMEDIA: 4320 mtx_lock(&sc->driver_mtx); 4321 if (sc->dying) { 4322 mtx_unlock(&sc->driver_mtx); 4323 return (EINVAL); 4324 } 4325 mxge_media_probe(sc); 4326 mtx_unlock(&sc->driver_mtx); 4327 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4328 &sc->media, command); 4329 break; 4330 4331 case SIOCGI2C: 4332 if (sc->connector != MXGE_XFP && 4333 sc->connector != MXGE_SFP) { 4334 err = ENXIO; 4335 break; 4336 } 4337 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4338 if (err != 0) 4339 break; 4340 mtx_lock(&sc->driver_mtx); 4341 if (sc->dying) { 4342 mtx_unlock(&sc->driver_mtx); 4343 return (EINVAL); 4344 } 4345 err = mxge_fetch_i2c(sc, &i2c); 4346 mtx_unlock(&sc->driver_mtx); 4347 if (err == 0) 4348 err = copyout(&i2c, ifr->ifr_ifru.ifru_data, 4349 sizeof(i2c)); 4350 break; 4351 default: 4352 err = ether_ioctl(ifp, command, data); 4353 break; 4354 } 4355 return err; 4356 } 4357 4358 static void 4359 mxge_fetch_tunables(mxge_softc_t *sc) 4360 { 4361 4362 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4363 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4364 &mxge_flow_control); 4365 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4366 &mxge_intr_coal_delay); 4367 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4368 &mxge_nvidia_ecrc_enable); 4369 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4370 &mxge_force_firmware); 4371 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4372 &mxge_deassert_wait); 4373 TUNABLE_INT_FETCH("hw.mxge.verbose", 4374 &mxge_verbose); 4375 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4376 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4377 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4378 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4379 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4380 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4381 4382 if (bootverbose) 4383 mxge_verbose = 1; 4384 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4385 mxge_intr_coal_delay = 30; 4386 if (mxge_ticks == 0) 4387 mxge_ticks = hz / 2; 4388 sc->pause = mxge_flow_control; 4389 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4390 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4391 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4392 } 4393 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4394 mxge_initial_mtu < ETHER_MIN_LEN) 4395 mxge_initial_mtu = ETHERMTU_JUMBO; 4396 4397 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4398 mxge_throttle = MXGE_MAX_THROTTLE; 4399 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4400 mxge_throttle = MXGE_MIN_THROTTLE; 4401 sc->throttle = mxge_throttle; 4402 } 4403 4404 4405 static void 4406 mxge_free_slices(mxge_softc_t *sc) 4407 { 4408 struct mxge_slice_state *ss; 4409 int i; 4410 4411 4412 if (sc->ss == NULL) 4413 return; 4414 4415 for (i = 0; i < sc->num_slices; i++) { 4416 ss = &sc->ss[i]; 4417 if (ss->fw_stats != NULL) { 4418 mxge_dma_free(&ss->fw_stats_dma); 4419 ss->fw_stats = NULL; 4420 #ifdef IFNET_BUF_RING 4421 if (ss->tx.br != NULL) { 4422 drbr_free(ss->tx.br, M_DEVBUF); 4423 ss->tx.br = NULL; 4424 } 4425 #endif 4426 mtx_destroy(&ss->tx.mtx); 4427 } 4428 if (ss->rx_done.entry != NULL) { 4429 mxge_dma_free(&ss->rx_done.dma); 4430 ss->rx_done.entry = NULL; 4431 } 4432 } 4433 free(sc->ss, M_DEVBUF); 4434 sc->ss = NULL; 4435 } 4436 4437 static int 4438 mxge_alloc_slices(mxge_softc_t *sc) 4439 { 4440 mxge_cmd_t cmd; 4441 struct mxge_slice_state *ss; 4442 size_t bytes; 4443 int err, i, max_intr_slots; 4444 4445 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4446 if (err != 0) { 4447 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4448 return err; 4449 } 4450 sc->rx_ring_size = cmd.data0; 4451 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4452 4453 bytes = sizeof (*sc->ss) * sc->num_slices; 4454 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4455 if (sc->ss == NULL) 4456 return (ENOMEM); 4457 for (i = 0; i < sc->num_slices; i++) { 4458 ss = &sc->ss[i]; 4459 4460 ss->sc = sc; 4461 4462 /* allocate per-slice rx interrupt queues */ 4463 4464 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4465 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4466 if (err != 0) 4467 goto abort; 4468 ss->rx_done.entry = ss->rx_done.dma.addr; 4469 bzero(ss->rx_done.entry, bytes); 4470 4471 /* 4472 * allocate the per-slice firmware stats; stats 4473 * (including tx) are used used only on the first 4474 * slice for now 4475 */ 4476 #ifndef IFNET_BUF_RING 4477 if (i > 0) 4478 continue; 4479 #endif 4480 4481 bytes = sizeof (*ss->fw_stats); 4482 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4483 sizeof (*ss->fw_stats), 64); 4484 if (err != 0) 4485 goto abort; 4486 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4487 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4488 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4489 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4490 #ifdef IFNET_BUF_RING 4491 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4492 &ss->tx.mtx); 4493 #endif 4494 } 4495 4496 return (0); 4497 4498 abort: 4499 mxge_free_slices(sc); 4500 return (ENOMEM); 4501 } 4502 4503 static void 4504 mxge_slice_probe(mxge_softc_t *sc) 4505 { 4506 mxge_cmd_t cmd; 4507 char *old_fw; 4508 int msix_cnt, status, max_intr_slots; 4509 4510 sc->num_slices = 1; 4511 /* 4512 * don't enable multiple slices if they are not enabled, 4513 * or if this is not an SMP system 4514 */ 4515 4516 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4517 return; 4518 4519 /* see how many MSI-X interrupts are available */ 4520 msix_cnt = pci_msix_count(sc->dev); 4521 if (msix_cnt < 2) 4522 return; 4523 4524 /* now load the slice aware firmware see what it supports */ 4525 old_fw = sc->fw_name; 4526 if (old_fw == mxge_fw_aligned) 4527 sc->fw_name = mxge_fw_rss_aligned; 4528 else 4529 sc->fw_name = mxge_fw_rss_unaligned; 4530 status = mxge_load_firmware(sc, 0); 4531 if (status != 0) { 4532 device_printf(sc->dev, "Falling back to a single slice\n"); 4533 return; 4534 } 4535 4536 /* try to send a reset command to the card to see if it 4537 is alive */ 4538 memset(&cmd, 0, sizeof (cmd)); 4539 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4540 if (status != 0) { 4541 device_printf(sc->dev, "failed reset\n"); 4542 goto abort_with_fw; 4543 } 4544 4545 /* get rx ring size */ 4546 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4547 if (status != 0) { 4548 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4549 goto abort_with_fw; 4550 } 4551 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4552 4553 /* tell it the size of the interrupt queues */ 4554 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4555 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4556 if (status != 0) { 4557 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4558 goto abort_with_fw; 4559 } 4560 4561 /* ask the maximum number of slices it supports */ 4562 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4563 if (status != 0) { 4564 device_printf(sc->dev, 4565 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4566 goto abort_with_fw; 4567 } 4568 sc->num_slices = cmd.data0; 4569 if (sc->num_slices > msix_cnt) 4570 sc->num_slices = msix_cnt; 4571 4572 if (mxge_max_slices == -1) { 4573 /* cap to number of CPUs in system */ 4574 if (sc->num_slices > mp_ncpus) 4575 sc->num_slices = mp_ncpus; 4576 } else { 4577 if (sc->num_slices > mxge_max_slices) 4578 sc->num_slices = mxge_max_slices; 4579 } 4580 /* make sure it is a power of two */ 4581 while (sc->num_slices & (sc->num_slices - 1)) 4582 sc->num_slices--; 4583 4584 if (mxge_verbose) 4585 device_printf(sc->dev, "using %d slices\n", 4586 sc->num_slices); 4587 4588 return; 4589 4590 abort_with_fw: 4591 sc->fw_name = old_fw; 4592 (void) mxge_load_firmware(sc, 0); 4593 } 4594 4595 static int 4596 mxge_add_msix_irqs(mxge_softc_t *sc) 4597 { 4598 size_t bytes; 4599 int count, err, i, rid; 4600 4601 rid = PCIR_BAR(2); 4602 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4603 &rid, RF_ACTIVE); 4604 4605 if (sc->msix_table_res == NULL) { 4606 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4607 return ENXIO; 4608 } 4609 4610 count = sc->num_slices; 4611 err = pci_alloc_msix(sc->dev, &count); 4612 if (err != 0) { 4613 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4614 "err = %d \n", sc->num_slices, err); 4615 goto abort_with_msix_table; 4616 } 4617 if (count < sc->num_slices) { 4618 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4619 count, sc->num_slices); 4620 device_printf(sc->dev, 4621 "Try setting hw.mxge.max_slices to %d\n", 4622 count); 4623 err = ENOSPC; 4624 goto abort_with_msix; 4625 } 4626 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4627 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4628 if (sc->msix_irq_res == NULL) { 4629 err = ENOMEM; 4630 goto abort_with_msix; 4631 } 4632 4633 for (i = 0; i < sc->num_slices; i++) { 4634 rid = i + 1; 4635 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4636 SYS_RES_IRQ, 4637 &rid, RF_ACTIVE); 4638 if (sc->msix_irq_res[i] == NULL) { 4639 device_printf(sc->dev, "couldn't allocate IRQ res" 4640 " for message %d\n", i); 4641 err = ENXIO; 4642 goto abort_with_res; 4643 } 4644 } 4645 4646 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4647 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4648 4649 for (i = 0; i < sc->num_slices; i++) { 4650 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4651 INTR_TYPE_NET | INTR_MPSAFE, 4652 #if __FreeBSD_version > 700030 4653 NULL, 4654 #endif 4655 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4656 if (err != 0) { 4657 device_printf(sc->dev, "couldn't setup intr for " 4658 "message %d\n", i); 4659 goto abort_with_intr; 4660 } 4661 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4662 sc->msix_ih[i], "s%d", i); 4663 } 4664 4665 if (mxge_verbose) { 4666 device_printf(sc->dev, "using %d msix IRQs:", 4667 sc->num_slices); 4668 for (i = 0; i < sc->num_slices; i++) 4669 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4670 printf("\n"); 4671 } 4672 return (0); 4673 4674 abort_with_intr: 4675 for (i = 0; i < sc->num_slices; i++) { 4676 if (sc->msix_ih[i] != NULL) { 4677 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4678 sc->msix_ih[i]); 4679 sc->msix_ih[i] = NULL; 4680 } 4681 } 4682 free(sc->msix_ih, M_DEVBUF); 4683 4684 4685 abort_with_res: 4686 for (i = 0; i < sc->num_slices; i++) { 4687 rid = i + 1; 4688 if (sc->msix_irq_res[i] != NULL) 4689 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4690 sc->msix_irq_res[i]); 4691 sc->msix_irq_res[i] = NULL; 4692 } 4693 free(sc->msix_irq_res, M_DEVBUF); 4694 4695 4696 abort_with_msix: 4697 pci_release_msi(sc->dev); 4698 4699 abort_with_msix_table: 4700 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4701 sc->msix_table_res); 4702 4703 return err; 4704 } 4705 4706 static int 4707 mxge_add_single_irq(mxge_softc_t *sc) 4708 { 4709 int count, err, rid; 4710 4711 count = pci_msi_count(sc->dev); 4712 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4713 rid = 1; 4714 } else { 4715 rid = 0; 4716 sc->legacy_irq = 1; 4717 } 4718 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4719 RF_SHAREABLE | RF_ACTIVE); 4720 if (sc->irq_res == NULL) { 4721 device_printf(sc->dev, "could not alloc interrupt\n"); 4722 return ENXIO; 4723 } 4724 if (mxge_verbose) 4725 device_printf(sc->dev, "using %s irq %jd\n", 4726 sc->legacy_irq ? "INTx" : "MSI", 4727 rman_get_start(sc->irq_res)); 4728 err = bus_setup_intr(sc->dev, sc->irq_res, 4729 INTR_TYPE_NET | INTR_MPSAFE, 4730 #if __FreeBSD_version > 700030 4731 NULL, 4732 #endif 4733 mxge_intr, &sc->ss[0], &sc->ih); 4734 if (err != 0) { 4735 bus_release_resource(sc->dev, SYS_RES_IRQ, 4736 sc->legacy_irq ? 0 : 1, sc->irq_res); 4737 if (!sc->legacy_irq) 4738 pci_release_msi(sc->dev); 4739 } 4740 return err; 4741 } 4742 4743 static void 4744 mxge_rem_msix_irqs(mxge_softc_t *sc) 4745 { 4746 int i, rid; 4747 4748 for (i = 0; i < sc->num_slices; i++) { 4749 if (sc->msix_ih[i] != NULL) { 4750 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4751 sc->msix_ih[i]); 4752 sc->msix_ih[i] = NULL; 4753 } 4754 } 4755 free(sc->msix_ih, M_DEVBUF); 4756 4757 for (i = 0; i < sc->num_slices; i++) { 4758 rid = i + 1; 4759 if (sc->msix_irq_res[i] != NULL) 4760 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4761 sc->msix_irq_res[i]); 4762 sc->msix_irq_res[i] = NULL; 4763 } 4764 free(sc->msix_irq_res, M_DEVBUF); 4765 4766 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4767 sc->msix_table_res); 4768 4769 pci_release_msi(sc->dev); 4770 return; 4771 } 4772 4773 static void 4774 mxge_rem_single_irq(mxge_softc_t *sc) 4775 { 4776 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4777 bus_release_resource(sc->dev, SYS_RES_IRQ, 4778 sc->legacy_irq ? 0 : 1, sc->irq_res); 4779 if (!sc->legacy_irq) 4780 pci_release_msi(sc->dev); 4781 } 4782 4783 static void 4784 mxge_rem_irq(mxge_softc_t *sc) 4785 { 4786 if (sc->num_slices > 1) 4787 mxge_rem_msix_irqs(sc); 4788 else 4789 mxge_rem_single_irq(sc); 4790 } 4791 4792 static int 4793 mxge_add_irq(mxge_softc_t *sc) 4794 { 4795 int err; 4796 4797 if (sc->num_slices > 1) 4798 err = mxge_add_msix_irqs(sc); 4799 else 4800 err = mxge_add_single_irq(sc); 4801 4802 if (0 && err == 0 && sc->num_slices > 1) { 4803 mxge_rem_msix_irqs(sc); 4804 err = mxge_add_msix_irqs(sc); 4805 } 4806 return err; 4807 } 4808 4809 4810 static int 4811 mxge_attach(device_t dev) 4812 { 4813 mxge_cmd_t cmd; 4814 mxge_softc_t *sc = device_get_softc(dev); 4815 struct ifnet *ifp; 4816 int err, rid; 4817 4818 sc->dev = dev; 4819 mxge_fetch_tunables(sc); 4820 4821 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4822 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4823 taskqueue_thread_enqueue, &sc->tq); 4824 if (sc->tq == NULL) { 4825 err = ENOMEM; 4826 goto abort_with_nothing; 4827 } 4828 4829 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4830 1, /* alignment */ 4831 0, /* boundary */ 4832 BUS_SPACE_MAXADDR, /* low */ 4833 BUS_SPACE_MAXADDR, /* high */ 4834 NULL, NULL, /* filter */ 4835 65536 + 256, /* maxsize */ 4836 MXGE_MAX_SEND_DESC, /* num segs */ 4837 65536, /* maxsegsize */ 4838 0, /* flags */ 4839 NULL, NULL, /* lock */ 4840 &sc->parent_dmat); /* tag */ 4841 4842 if (err != 0) { 4843 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4844 err); 4845 goto abort_with_tq; 4846 } 4847 4848 ifp = sc->ifp = if_alloc(IFT_ETHER); 4849 if (ifp == NULL) { 4850 device_printf(dev, "can not if_alloc()\n"); 4851 err = ENOSPC; 4852 goto abort_with_parent_dmat; 4853 } 4854 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4855 4856 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4857 device_get_nameunit(dev)); 4858 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4859 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4860 "%s:drv", device_get_nameunit(dev)); 4861 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4862 MTX_NETWORK_LOCK, MTX_DEF); 4863 4864 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4865 4866 mxge_setup_cfg_space(sc); 4867 4868 /* Map the board into the kernel */ 4869 rid = PCIR_BARS; 4870 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4871 RF_ACTIVE); 4872 if (sc->mem_res == NULL) { 4873 device_printf(dev, "could not map memory\n"); 4874 err = ENXIO; 4875 goto abort_with_lock; 4876 } 4877 sc->sram = rman_get_virtual(sc->mem_res); 4878 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4879 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4880 device_printf(dev, "impossible memory region size %jd\n", 4881 rman_get_size(sc->mem_res)); 4882 err = ENXIO; 4883 goto abort_with_mem_res; 4884 } 4885 4886 /* make NULL terminated copy of the EEPROM strings section of 4887 lanai SRAM */ 4888 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4889 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4890 rman_get_bushandle(sc->mem_res), 4891 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4892 sc->eeprom_strings, 4893 MXGE_EEPROM_STRINGS_SIZE - 2); 4894 err = mxge_parse_strings(sc); 4895 if (err != 0) 4896 goto abort_with_mem_res; 4897 4898 /* Enable write combining for efficient use of PCIe bus */ 4899 mxge_enable_wc(sc); 4900 4901 /* Allocate the out of band dma memory */ 4902 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4903 sizeof (mxge_cmd_t), 64); 4904 if (err != 0) 4905 goto abort_with_mem_res; 4906 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4907 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4908 if (err != 0) 4909 goto abort_with_cmd_dma; 4910 4911 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4912 if (err != 0) 4913 goto abort_with_zeropad_dma; 4914 4915 /* select & load the firmware */ 4916 err = mxge_select_firmware(sc); 4917 if (err != 0) 4918 goto abort_with_dmabench; 4919 sc->intr_coal_delay = mxge_intr_coal_delay; 4920 4921 mxge_slice_probe(sc); 4922 err = mxge_alloc_slices(sc); 4923 if (err != 0) 4924 goto abort_with_dmabench; 4925 4926 err = mxge_reset(sc, 0); 4927 if (err != 0) 4928 goto abort_with_slices; 4929 4930 err = mxge_alloc_rings(sc); 4931 if (err != 0) { 4932 device_printf(sc->dev, "failed to allocate rings\n"); 4933 goto abort_with_slices; 4934 } 4935 4936 err = mxge_add_irq(sc); 4937 if (err != 0) { 4938 device_printf(sc->dev, "failed to add irq\n"); 4939 goto abort_with_rings; 4940 } 4941 4942 ifp->if_baudrate = IF_Gbps(10); 4943 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4944 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4945 IFCAP_RXCSUM_IPV6; 4946 #if defined(INET) || defined(INET6) 4947 ifp->if_capabilities |= IFCAP_LRO; 4948 #endif 4949 4950 #ifdef MXGE_NEW_VLAN_API 4951 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4952 4953 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4954 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4955 sc->fw_ver_tiny >= 32) 4956 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4957 #endif 4958 sc->max_mtu = mxge_max_mtu(sc); 4959 if (sc->max_mtu >= 9000) 4960 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4961 else 4962 device_printf(dev, "MTU limited to %d. Install " 4963 "latest firmware for 9000 byte jumbo support\n", 4964 sc->max_mtu - ETHER_HDR_LEN); 4965 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4966 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4967 /* check to see if f/w supports TSO for IPv6 */ 4968 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4969 if (CSUM_TCP_IPV6) 4970 ifp->if_capabilities |= IFCAP_TSO6; 4971 sc->max_tso6_hlen = min(cmd.data0, 4972 sizeof (sc->ss[0].scratch)); 4973 } 4974 ifp->if_capenable = ifp->if_capabilities; 4975 if (sc->lro_cnt == 0) 4976 ifp->if_capenable &= ~IFCAP_LRO; 4977 ifp->if_init = mxge_init; 4978 ifp->if_softc = sc; 4979 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4980 ifp->if_ioctl = mxge_ioctl; 4981 ifp->if_start = mxge_start; 4982 ifp->if_get_counter = mxge_get_counter; 4983 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4984 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4985 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4986 /* Initialise the ifmedia structure */ 4987 ifmedia_init(&sc->media, 0, mxge_media_change, 4988 mxge_media_status); 4989 mxge_media_init(sc); 4990 mxge_media_probe(sc); 4991 sc->dying = 0; 4992 ether_ifattach(ifp, sc->mac_addr); 4993 /* ether_ifattach sets mtu to ETHERMTU */ 4994 if (mxge_initial_mtu != ETHERMTU) 4995 mxge_change_mtu(sc, mxge_initial_mtu); 4996 4997 mxge_add_sysctls(sc); 4998 #ifdef IFNET_BUF_RING 4999 ifp->if_transmit = mxge_transmit; 5000 ifp->if_qflush = mxge_qflush; 5001 #endif 5002 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 5003 device_get_nameunit(sc->dev)); 5004 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 5005 return 0; 5006 5007 abort_with_rings: 5008 mxge_free_rings(sc); 5009 abort_with_slices: 5010 mxge_free_slices(sc); 5011 abort_with_dmabench: 5012 mxge_dma_free(&sc->dmabench_dma); 5013 abort_with_zeropad_dma: 5014 mxge_dma_free(&sc->zeropad_dma); 5015 abort_with_cmd_dma: 5016 mxge_dma_free(&sc->cmd_dma); 5017 abort_with_mem_res: 5018 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5019 abort_with_lock: 5020 pci_disable_busmaster(dev); 5021 mtx_destroy(&sc->cmd_mtx); 5022 mtx_destroy(&sc->driver_mtx); 5023 if_free(ifp); 5024 abort_with_parent_dmat: 5025 bus_dma_tag_destroy(sc->parent_dmat); 5026 abort_with_tq: 5027 if (sc->tq != NULL) { 5028 taskqueue_drain(sc->tq, &sc->watchdog_task); 5029 taskqueue_free(sc->tq); 5030 sc->tq = NULL; 5031 } 5032 abort_with_nothing: 5033 return err; 5034 } 5035 5036 static int 5037 mxge_detach(device_t dev) 5038 { 5039 mxge_softc_t *sc = device_get_softc(dev); 5040 5041 if (mxge_vlans_active(sc)) { 5042 device_printf(sc->dev, 5043 "Detach vlans before removing module\n"); 5044 return EBUSY; 5045 } 5046 mtx_lock(&sc->driver_mtx); 5047 sc->dying = 1; 5048 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 5049 mxge_close(sc, 0); 5050 mtx_unlock(&sc->driver_mtx); 5051 ether_ifdetach(sc->ifp); 5052 if (sc->tq != NULL) { 5053 taskqueue_drain(sc->tq, &sc->watchdog_task); 5054 taskqueue_free(sc->tq); 5055 sc->tq = NULL; 5056 } 5057 callout_drain(&sc->co_hdl); 5058 ifmedia_removeall(&sc->media); 5059 mxge_dummy_rdma(sc, 0); 5060 mxge_rem_sysctls(sc); 5061 mxge_rem_irq(sc); 5062 mxge_free_rings(sc); 5063 mxge_free_slices(sc); 5064 mxge_dma_free(&sc->dmabench_dma); 5065 mxge_dma_free(&sc->zeropad_dma); 5066 mxge_dma_free(&sc->cmd_dma); 5067 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5068 pci_disable_busmaster(dev); 5069 mtx_destroy(&sc->cmd_mtx); 5070 mtx_destroy(&sc->driver_mtx); 5071 if_free(sc->ifp); 5072 bus_dma_tag_destroy(sc->parent_dmat); 5073 return 0; 5074 } 5075 5076 static int 5077 mxge_shutdown(device_t dev) 5078 { 5079 return 0; 5080 } 5081 5082 /* 5083 This file uses Myri10GE driver indentation. 5084 5085 Local Variables: 5086 c-file-style:"linux" 5087 tab-width:8 5088 End: 5089 */ 5090