1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #ifdef IFNET_BUF_RING 96 #include <sys/buf_ring.h> 97 #endif 98 99 #include "opt_inet.h" 100 #include "opt_inet6.h" 101 102 /* tunable params */ 103 static int mxge_nvidia_ecrc_enable = 1; 104 static int mxge_force_firmware = 0; 105 static int mxge_intr_coal_delay = 30; 106 static int mxge_deassert_wait = 1; 107 static int mxge_flow_control = 1; 108 static int mxge_verbose = 0; 109 static int mxge_ticks; 110 static int mxge_max_slices = 1; 111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 112 static int mxge_always_promisc = 0; 113 static int mxge_initial_mtu = ETHERMTU_JUMBO; 114 static int mxge_throttle = 0; 115 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 116 static char *mxge_fw_aligned = "mxge_eth_z8e"; 117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 119 120 static int mxge_probe(device_t dev); 121 static int mxge_attach(device_t dev); 122 static int mxge_detach(device_t dev); 123 static int mxge_shutdown(device_t dev); 124 static void mxge_intr(void *arg); 125 126 static device_method_t mxge_methods[] = 127 { 128 /* Device interface */ 129 DEVMETHOD(device_probe, mxge_probe), 130 DEVMETHOD(device_attach, mxge_attach), 131 DEVMETHOD(device_detach, mxge_detach), 132 DEVMETHOD(device_shutdown, mxge_shutdown), 133 134 DEVMETHOD_END 135 }; 136 137 static driver_t mxge_driver = 138 { 139 "mxge", 140 mxge_methods, 141 sizeof(mxge_softc_t), 142 }; 143 144 static devclass_t mxge_devclass; 145 146 /* Declare ourselves to be a child of the PCI bus.*/ 147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 148 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 149 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 150 151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 153 static int mxge_close(mxge_softc_t *sc, int down); 154 static int mxge_open(mxge_softc_t *sc); 155 static void mxge_tick(void *arg); 156 157 static int 158 mxge_probe(device_t dev) 159 { 160 int rev; 161 162 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 163 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 164 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 165 rev = pci_get_revid(dev); 166 switch (rev) { 167 case MXGE_PCI_REV_Z8E: 168 device_set_desc(dev, "Myri10G-PCIE-8A"); 169 break; 170 case MXGE_PCI_REV_Z8ES: 171 device_set_desc(dev, "Myri10G-PCIE-8B"); 172 break; 173 default: 174 device_set_desc(dev, "Myri10G-PCIE-8??"); 175 device_printf(dev, "Unrecognized rev %d NIC\n", 176 rev); 177 break; 178 } 179 return 0; 180 } 181 return ENXIO; 182 } 183 184 static void 185 mxge_enable_wc(mxge_softc_t *sc) 186 { 187 #if defined(__i386) || defined(__amd64) 188 vm_offset_t len; 189 int err; 190 191 sc->wc = 1; 192 len = rman_get_size(sc->mem_res); 193 err = pmap_change_attr((vm_offset_t) sc->sram, 194 len, PAT_WRITE_COMBINING); 195 if (err != 0) { 196 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 197 err); 198 sc->wc = 0; 199 } 200 #endif 201 } 202 203 /* callback to get our DMA address */ 204 static void 205 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 206 int error) 207 { 208 if (error == 0) { 209 *(bus_addr_t *) arg = segs->ds_addr; 210 } 211 } 212 213 static int 214 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 215 bus_size_t alignment) 216 { 217 int err; 218 device_t dev = sc->dev; 219 bus_size_t boundary, maxsegsize; 220 221 if (bytes > 4096 && alignment == 4096) { 222 boundary = 0; 223 maxsegsize = bytes; 224 } else { 225 boundary = 4096; 226 maxsegsize = 4096; 227 } 228 229 /* allocate DMAable memory tags */ 230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 231 alignment, /* alignment */ 232 boundary, /* boundary */ 233 BUS_SPACE_MAXADDR, /* low */ 234 BUS_SPACE_MAXADDR, /* high */ 235 NULL, NULL, /* filter */ 236 bytes, /* maxsize */ 237 1, /* num segs */ 238 maxsegsize, /* maxsegsize */ 239 BUS_DMA_COHERENT, /* flags */ 240 NULL, NULL, /* lock */ 241 &dma->dmat); /* tag */ 242 if (err != 0) { 243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 244 return err; 245 } 246 247 /* allocate DMAable memory & map */ 248 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 250 | BUS_DMA_ZERO), &dma->map); 251 if (err != 0) { 252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 253 goto abort_with_dmat; 254 } 255 256 /* load the memory */ 257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 258 mxge_dmamap_callback, 259 (void *)&dma->bus_addr, 0); 260 if (err != 0) { 261 device_printf(dev, "couldn't load map (err = %d)\n", err); 262 goto abort_with_mem; 263 } 264 return 0; 265 266 abort_with_mem: 267 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 268 abort_with_dmat: 269 (void)bus_dma_tag_destroy(dma->dmat); 270 return err; 271 } 272 273 static void 274 mxge_dma_free(mxge_dma_t *dma) 275 { 276 bus_dmamap_unload(dma->dmat, dma->map); 277 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 278 (void)bus_dma_tag_destroy(dma->dmat); 279 } 280 281 /* 282 * The eeprom strings on the lanaiX have the format 283 * SN=x\0 284 * MAC=x:x:x:x:x:x\0 285 * PC=text\0 286 */ 287 288 static int 289 mxge_parse_strings(mxge_softc_t *sc) 290 { 291 char *ptr; 292 int i, found_mac, found_sn2; 293 char *endptr; 294 295 ptr = sc->eeprom_strings; 296 found_mac = 0; 297 found_sn2 = 0; 298 while (*ptr != '\0') { 299 if (strncmp(ptr, "MAC=", 4) == 0) { 300 ptr += 4; 301 for (i = 0;;) { 302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 303 if (endptr - ptr != 2) 304 goto abort; 305 ptr = endptr; 306 if (++i == 6) 307 break; 308 if (*ptr++ != ':') 309 goto abort; 310 } 311 found_mac = 1; 312 } else if (strncmp(ptr, "PC=", 3) == 0) { 313 ptr += 3; 314 strlcpy(sc->product_code_string, ptr, 315 sizeof(sc->product_code_string)); 316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 317 ptr += 3; 318 strlcpy(sc->serial_number_string, ptr, 319 sizeof(sc->serial_number_string)); 320 } else if (strncmp(ptr, "SN2=", 4) == 0) { 321 /* SN2 takes precedence over SN */ 322 ptr += 4; 323 found_sn2 = 1; 324 strlcpy(sc->serial_number_string, ptr, 325 sizeof(sc->serial_number_string)); 326 } 327 while (*ptr++ != '\0') {} 328 } 329 330 if (found_mac) 331 return 0; 332 333 abort: 334 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 335 336 return ENXIO; 337 } 338 339 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 340 static void 341 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 342 { 343 uint32_t val; 344 unsigned long base, off; 345 char *va, *cfgptr; 346 device_t pdev, mcp55; 347 uint16_t vendor_id, device_id, word; 348 uintptr_t bus, slot, func, ivend, idev; 349 uint32_t *ptr32; 350 351 if (!mxge_nvidia_ecrc_enable) 352 return; 353 354 pdev = device_get_parent(device_get_parent(sc->dev)); 355 if (pdev == NULL) { 356 device_printf(sc->dev, "could not find parent?\n"); 357 return; 358 } 359 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 360 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 361 362 if (vendor_id != 0x10de) 363 return; 364 365 base = 0; 366 367 if (device_id == 0x005d) { 368 /* ck804, base address is magic */ 369 base = 0xe0000000UL; 370 } else if (device_id >= 0x0374 && device_id <= 0x378) { 371 /* mcp55, base address stored in chipset */ 372 mcp55 = pci_find_bsf(0, 0, 0); 373 if (mcp55 && 374 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 375 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 376 word = pci_read_config(mcp55, 0x90, 2); 377 base = ((unsigned long)word & 0x7ffeU) << 25; 378 } 379 } 380 if (!base) 381 return; 382 383 /* XXXX 384 Test below is commented because it is believed that doing 385 config read/write beyond 0xff will access the config space 386 for the next larger function. Uncomment this and remove 387 the hacky pmap_mapdev() way of accessing config space when 388 FreeBSD grows support for extended pcie config space access 389 */ 390 #if 0 391 /* See if we can, by some miracle, access the extended 392 config space */ 393 val = pci_read_config(pdev, 0x178, 4); 394 if (val != 0xffffffff) { 395 val |= 0x40; 396 pci_write_config(pdev, 0x178, val, 4); 397 return; 398 } 399 #endif 400 /* Rather than using normal pci config space writes, we must 401 * map the Nvidia config space ourselves. This is because on 402 * opteron/nvidia class machine the 0xe000000 mapping is 403 * handled by the nvidia chipset, that means the internal PCI 404 * device (the on-chip northbridge), or the amd-8131 bridge 405 * and things behind them are not visible by this method. 406 */ 407 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_BUS, &bus); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_SLOT, &slot); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_FUNCTION, &func); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_VENDOR, &ivend); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_DEVICE, &idev); 418 419 off = base 420 + 0x00100000UL * (unsigned long)bus 421 + 0x00001000UL * (unsigned long)(func 422 + 8 * slot); 423 424 /* map it into the kernel */ 425 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 426 427 if (va == NULL) { 428 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 429 return; 430 } 431 /* get a pointer to the config space mapped into the kernel */ 432 cfgptr = va + (off & PAGE_MASK); 433 434 /* make sure that we can really access it */ 435 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 436 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 437 if (! (vendor_id == ivend && device_id == idev)) { 438 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 439 vendor_id, device_id); 440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 441 return; 442 } 443 444 ptr32 = (uint32_t*)(cfgptr + 0x178); 445 val = *ptr32; 446 447 if (val == 0xffffffff) { 448 device_printf(sc->dev, "extended mapping failed\n"); 449 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 450 return; 451 } 452 *ptr32 = val | 0x40; 453 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 454 if (mxge_verbose) 455 device_printf(sc->dev, 456 "Enabled ECRC on upstream Nvidia bridge " 457 "at %d:%d:%d\n", 458 (int)bus, (int)slot, (int)func); 459 return; 460 } 461 #else 462 static void 463 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 464 { 465 device_printf(sc->dev, 466 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 467 return; 468 } 469 #endif 470 471 static int 472 mxge_dma_test(mxge_softc_t *sc, int test_type) 473 { 474 mxge_cmd_t cmd; 475 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 476 int status; 477 uint32_t len; 478 char *test = " "; 479 480 /* Run a small DMA test. 481 * The magic multipliers to the length tell the firmware 482 * to do DMA read, write, or read+write tests. The 483 * results are returned in cmd.data0. The upper 16 484 * bits of the return is the number of transfers completed. 485 * The lower 16 bits is the time in 0.5us ticks that the 486 * transfers took to complete. 487 */ 488 489 len = sc->tx_boundary; 490 491 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 492 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 493 cmd.data2 = len * 0x10000; 494 status = mxge_send_cmd(sc, test_type, &cmd); 495 if (status != 0) { 496 test = "read"; 497 goto abort; 498 } 499 sc->read_dma = ((cmd.data0>>16) * len * 2) / 500 (cmd.data0 & 0xffff); 501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 503 cmd.data2 = len * 0x1; 504 status = mxge_send_cmd(sc, test_type, &cmd); 505 if (status != 0) { 506 test = "write"; 507 goto abort; 508 } 509 sc->write_dma = ((cmd.data0>>16) * len * 2) / 510 (cmd.data0 & 0xffff); 511 512 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 513 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 514 cmd.data2 = len * 0x10001; 515 status = mxge_send_cmd(sc, test_type, &cmd); 516 if (status != 0) { 517 test = "read/write"; 518 goto abort; 519 } 520 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 521 (cmd.data0 & 0xffff); 522 523 abort: 524 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 525 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 526 test, status); 527 528 return status; 529 } 530 531 /* 532 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 533 * when the PCI-E Completion packets are aligned on an 8-byte 534 * boundary. Some PCI-E chip sets always align Completion packets; on 535 * the ones that do not, the alignment can be enforced by enabling 536 * ECRC generation (if supported). 537 * 538 * When PCI-E Completion packets are not aligned, it is actually more 539 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 540 * 541 * If the driver can neither enable ECRC nor verify that it has 542 * already been enabled, then it must use a firmware image which works 543 * around unaligned completion packets (ethp_z8e.dat), and it should 544 * also ensure that it never gives the device a Read-DMA which is 545 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 546 * enabled, then the driver should use the aligned (eth_z8e.dat) 547 * firmware image, and set tx_boundary to 4KB. 548 */ 549 550 static int 551 mxge_firmware_probe(mxge_softc_t *sc) 552 { 553 device_t dev = sc->dev; 554 int reg, status; 555 uint16_t pectl; 556 557 sc->tx_boundary = 4096; 558 /* 559 * Verify the max read request size was set to 4KB 560 * before trying the test with 4KB. 561 */ 562 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 563 pectl = pci_read_config(dev, reg + 0x8, 2); 564 if ((pectl & (5 << 12)) != (5 << 12)) { 565 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 566 pectl); 567 sc->tx_boundary = 2048; 568 } 569 } 570 571 /* 572 * load the optimized firmware (which assumes aligned PCIe 573 * completions) in order to see if it works on this host. 574 */ 575 sc->fw_name = mxge_fw_aligned; 576 status = mxge_load_firmware(sc, 1); 577 if (status != 0) { 578 return status; 579 } 580 581 /* 582 * Enable ECRC if possible 583 */ 584 mxge_enable_nvidia_ecrc(sc); 585 586 /* 587 * Run a DMA test which watches for unaligned completions and 588 * aborts on the first one seen. Not required on Z8ES or newer. 589 */ 590 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 591 return 0; 592 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 593 if (status == 0) 594 return 0; /* keep the aligned firmware */ 595 596 if (status != E2BIG) 597 device_printf(dev, "DMA test failed: %d\n", status); 598 if (status == ENOSYS) 599 device_printf(dev, "Falling back to ethp! " 600 "Please install up to date fw\n"); 601 return status; 602 } 603 604 static int 605 mxge_select_firmware(mxge_softc_t *sc) 606 { 607 int aligned = 0; 608 int force_firmware = mxge_force_firmware; 609 610 if (sc->throttle) 611 force_firmware = sc->throttle; 612 613 if (force_firmware != 0) { 614 if (force_firmware == 1) 615 aligned = 1; 616 else 617 aligned = 0; 618 if (mxge_verbose) 619 device_printf(sc->dev, 620 "Assuming %s completions (forced)\n", 621 aligned ? "aligned" : "unaligned"); 622 goto abort; 623 } 624 625 /* if the PCIe link width is 4 or less, we can use the aligned 626 firmware and skip any checks */ 627 if (sc->link_width != 0 && sc->link_width <= 4) { 628 device_printf(sc->dev, 629 "PCIe x%d Link, expect reduced performance\n", 630 sc->link_width); 631 aligned = 1; 632 goto abort; 633 } 634 635 if (0 == mxge_firmware_probe(sc)) 636 return 0; 637 638 abort: 639 if (aligned) { 640 sc->fw_name = mxge_fw_aligned; 641 sc->tx_boundary = 4096; 642 } else { 643 sc->fw_name = mxge_fw_unaligned; 644 sc->tx_boundary = 2048; 645 } 646 return (mxge_load_firmware(sc, 0)); 647 } 648 649 static int 650 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 651 { 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static int 680 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 681 { 682 z_stream zs; 683 char *inflate_buffer; 684 const struct firmware *fw; 685 const mcp_gen_header_t *hdr; 686 unsigned hdr_offset; 687 int status; 688 unsigned int i; 689 char dummy; 690 size_t fw_len; 691 692 fw = firmware_get(sc->fw_name); 693 if (fw == NULL) { 694 device_printf(sc->dev, "Could not find firmware image %s\n", 695 sc->fw_name); 696 return ENOENT; 697 } 698 699 /* setup zlib and decompress f/w */ 700 bzero(&zs, sizeof (zs)); 701 zs.zalloc = zcalloc_nowait; 702 zs.zfree = zcfree; 703 status = inflateInit(&zs); 704 if (status != Z_OK) { 705 status = EIO; 706 goto abort_with_fw; 707 } 708 709 /* the uncompressed size is stored as the firmware version, 710 which would otherwise go unused */ 711 fw_len = (size_t) fw->version; 712 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 713 if (inflate_buffer == NULL) 714 goto abort_with_zs; 715 zs.avail_in = fw->datasize; 716 zs.next_in = __DECONST(char *, fw->data); 717 zs.avail_out = fw_len; 718 zs.next_out = inflate_buffer; 719 status = inflate(&zs, Z_FINISH); 720 if (status != Z_STREAM_END) { 721 device_printf(sc->dev, "zlib %d\n", status); 722 status = EIO; 723 goto abort_with_buffer; 724 } 725 726 /* check id */ 727 hdr_offset = htobe32(*(const uint32_t *) 728 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 729 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 730 device_printf(sc->dev, "Bad firmware file"); 731 status = EIO; 732 goto abort_with_buffer; 733 } 734 hdr = (const void*)(inflate_buffer + hdr_offset); 735 736 status = mxge_validate_firmware(sc, hdr); 737 if (status != 0) 738 goto abort_with_buffer; 739 740 /* Copy the inflated firmware to NIC SRAM. */ 741 for (i = 0; i < fw_len; i += 256) { 742 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 743 inflate_buffer + i, 744 min(256U, (unsigned)(fw_len - i))); 745 wmb(); 746 dummy = *sc->sram; 747 wmb(); 748 } 749 750 *limit = fw_len; 751 status = 0; 752 abort_with_buffer: 753 free(inflate_buffer, M_TEMP); 754 abort_with_zs: 755 inflateEnd(&zs); 756 abort_with_fw: 757 firmware_put(fw, FIRMWARE_UNLOAD); 758 return status; 759 } 760 761 /* 762 * Enable or disable periodic RDMAs from the host to make certain 763 * chipsets resend dropped PCIe messages 764 */ 765 766 static void 767 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 768 { 769 char buf_bytes[72]; 770 volatile uint32_t *confirm; 771 volatile char *submit; 772 uint32_t *buf, dma_low, dma_high; 773 int i; 774 775 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 776 777 /* clear confirmation addr */ 778 confirm = (volatile uint32_t *)sc->cmd; 779 *confirm = 0; 780 wmb(); 781 782 /* send an rdma command to the PCIe engine, and wait for the 783 response in the confirmation address. The firmware should 784 write a -1 there to indicate it is alive and well 785 */ 786 787 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 788 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 789 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 790 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 791 buf[2] = htobe32(0xffffffff); /* confirm data */ 792 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 793 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 794 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 795 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 796 buf[5] = htobe32(enable); /* enable? */ 797 798 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 799 800 mxge_pio_copy(submit, buf, 64); 801 wmb(); 802 DELAY(1000); 803 wmb(); 804 i = 0; 805 while (*confirm != 0xffffffff && i < 20) { 806 DELAY(1000); 807 i++; 808 } 809 if (*confirm != 0xffffffff) { 810 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 811 (enable ? "enable" : "disable"), confirm, 812 *confirm); 813 } 814 return; 815 } 816 817 static int 818 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 819 { 820 mcp_cmd_t *buf; 821 char buf_bytes[sizeof(*buf) + 8]; 822 volatile mcp_cmd_response_t *response = sc->cmd; 823 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 824 uint32_t dma_low, dma_high; 825 int err, sleep_total = 0; 826 827 /* ensure buf is aligned to 8 bytes */ 828 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 829 830 buf->data0 = htobe32(data->data0); 831 buf->data1 = htobe32(data->data1); 832 buf->data2 = htobe32(data->data2); 833 buf->cmd = htobe32(cmd); 834 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 835 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 836 837 buf->response_addr.low = htobe32(dma_low); 838 buf->response_addr.high = htobe32(dma_high); 839 mtx_lock(&sc->cmd_mtx); 840 response->result = 0xffffffff; 841 wmb(); 842 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 843 844 /* wait up to 20ms */ 845 err = EAGAIN; 846 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 847 bus_dmamap_sync(sc->cmd_dma.dmat, 848 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 849 wmb(); 850 switch (be32toh(response->result)) { 851 case 0: 852 data->data0 = be32toh(response->data); 853 err = 0; 854 break; 855 case 0xffffffff: 856 DELAY(1000); 857 break; 858 case MXGEFW_CMD_UNKNOWN: 859 err = ENOSYS; 860 break; 861 case MXGEFW_CMD_ERROR_UNALIGNED: 862 err = E2BIG; 863 break; 864 case MXGEFW_CMD_ERROR_BUSY: 865 err = EBUSY; 866 break; 867 case MXGEFW_CMD_ERROR_I2C_ABSENT: 868 err = ENXIO; 869 break; 870 default: 871 device_printf(sc->dev, 872 "mxge: command %d " 873 "failed, result = %d\n", 874 cmd, be32toh(response->result)); 875 err = ENXIO; 876 break; 877 } 878 if (err != EAGAIN) 879 break; 880 } 881 if (err == EAGAIN) 882 device_printf(sc->dev, "mxge: command %d timed out" 883 "result = %d\n", 884 cmd, be32toh(response->result)); 885 mtx_unlock(&sc->cmd_mtx); 886 return err; 887 } 888 889 static int 890 mxge_adopt_running_firmware(mxge_softc_t *sc) 891 { 892 struct mcp_gen_header *hdr; 893 const size_t bytes = sizeof (struct mcp_gen_header); 894 size_t hdr_offset; 895 int status; 896 897 /* find running firmware header */ 898 hdr_offset = htobe32(*(volatile uint32_t *) 899 (sc->sram + MCP_HEADER_PTR_OFFSET)); 900 901 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 902 device_printf(sc->dev, 903 "Running firmware has bad header offset (%d)\n", 904 (int)hdr_offset); 905 return EIO; 906 } 907 908 /* copy header of running firmware from SRAM to host memory to 909 * validate firmware */ 910 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 911 if (hdr == NULL) { 912 device_printf(sc->dev, "could not malloc firmware hdr\n"); 913 return ENOMEM; 914 } 915 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 916 rman_get_bushandle(sc->mem_res), 917 hdr_offset, (char *)hdr, bytes); 918 status = mxge_validate_firmware(sc, hdr); 919 free(hdr, M_DEVBUF); 920 921 /* 922 * check to see if adopted firmware has bug where adopting 923 * it will cause broadcasts to be filtered unless the NIC 924 * is kept in ALLMULTI mode 925 */ 926 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 927 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 928 sc->adopted_rx_filter_bug = 1; 929 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 930 "working around rx filter bug\n", 931 sc->fw_ver_major, sc->fw_ver_minor, 932 sc->fw_ver_tiny); 933 } 934 935 return status; 936 } 937 938 static int 939 mxge_load_firmware(mxge_softc_t *sc, int adopt) 940 { 941 volatile uint32_t *confirm; 942 volatile char *submit; 943 char buf_bytes[72]; 944 uint32_t *buf, size, dma_low, dma_high; 945 int status, i; 946 947 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 948 949 size = sc->sram_size; 950 status = mxge_load_firmware_helper(sc, &size); 951 if (status) { 952 if (!adopt) 953 return status; 954 /* Try to use the currently running firmware, if 955 it is new enough */ 956 status = mxge_adopt_running_firmware(sc); 957 if (status) { 958 device_printf(sc->dev, 959 "failed to adopt running firmware\n"); 960 return status; 961 } 962 device_printf(sc->dev, 963 "Successfully adopted running firmware\n"); 964 if (sc->tx_boundary == 4096) { 965 device_printf(sc->dev, 966 "Using firmware currently running on NIC" 967 ". For optimal\n"); 968 device_printf(sc->dev, 969 "performance consider loading optimized " 970 "firmware\n"); 971 } 972 sc->fw_name = mxge_fw_unaligned; 973 sc->tx_boundary = 2048; 974 return 0; 975 } 976 /* clear confirmation addr */ 977 confirm = (volatile uint32_t *)sc->cmd; 978 *confirm = 0; 979 wmb(); 980 /* send a reload command to the bootstrap MCP, and wait for the 981 response in the confirmation address. The firmware should 982 write a -1 there to indicate it is alive and well 983 */ 984 985 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 986 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 987 988 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 989 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 990 buf[2] = htobe32(0xffffffff); /* confirm data */ 991 992 /* FIX: All newest firmware should un-protect the bottom of 993 the sram before handoff. However, the very first interfaces 994 do not. Therefore the handoff copy must skip the first 8 bytes 995 */ 996 /* where the code starts*/ 997 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 998 buf[4] = htobe32(size - 8); /* length of code */ 999 buf[5] = htobe32(8); /* where to copy to */ 1000 buf[6] = htobe32(0); /* where to jump to */ 1001 1002 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1003 mxge_pio_copy(submit, buf, 64); 1004 wmb(); 1005 DELAY(1000); 1006 wmb(); 1007 i = 0; 1008 while (*confirm != 0xffffffff && i < 20) { 1009 DELAY(1000*10); 1010 i++; 1011 bus_dmamap_sync(sc->cmd_dma.dmat, 1012 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1013 } 1014 if (*confirm != 0xffffffff) { 1015 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1016 confirm, *confirm); 1017 1018 return ENXIO; 1019 } 1020 return 0; 1021 } 1022 1023 static int 1024 mxge_update_mac_address(mxge_softc_t *sc) 1025 { 1026 mxge_cmd_t cmd; 1027 uint8_t *addr = sc->mac_addr; 1028 int status; 1029 1030 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1031 | (addr[2] << 8) | addr[3]); 1032 1033 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1034 1035 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1036 return status; 1037 } 1038 1039 static int 1040 mxge_change_pause(mxge_softc_t *sc, int pause) 1041 { 1042 mxge_cmd_t cmd; 1043 int status; 1044 1045 if (pause) 1046 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1047 &cmd); 1048 else 1049 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1050 &cmd); 1051 1052 if (status) { 1053 device_printf(sc->dev, "Failed to set flow control mode\n"); 1054 return ENXIO; 1055 } 1056 sc->pause = pause; 1057 return 0; 1058 } 1059 1060 static void 1061 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1062 { 1063 mxge_cmd_t cmd; 1064 int status; 1065 1066 if (mxge_always_promisc) 1067 promisc = 1; 1068 1069 if (promisc) 1070 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1071 &cmd); 1072 else 1073 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1074 &cmd); 1075 1076 if (status) { 1077 device_printf(sc->dev, "Failed to set promisc mode\n"); 1078 } 1079 } 1080 1081 struct mxge_add_maddr_ctx { 1082 mxge_softc_t *sc; 1083 int error; 1084 }; 1085 1086 static u_int 1087 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1088 { 1089 struct mxge_add_maddr_ctx *ctx = arg; 1090 mxge_cmd_t cmd; 1091 1092 if (ctx->error != 0) 1093 return (0); 1094 bcopy(LLADDR(sdl), &cmd.data0, 4); 1095 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1096 cmd.data0 = htonl(cmd.data0); 1097 cmd.data1 = htonl(cmd.data1); 1098 1099 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1100 1101 return (1); 1102 } 1103 1104 static void 1105 mxge_set_multicast_list(mxge_softc_t *sc) 1106 { 1107 struct mxge_add_maddr_ctx ctx; 1108 struct ifnet *ifp = sc->ifp; 1109 mxge_cmd_t cmd; 1110 int err; 1111 1112 /* This firmware is known to not support multicast */ 1113 if (!sc->fw_multicast_support) 1114 return; 1115 1116 /* Disable multicast filtering while we play with the lists*/ 1117 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1118 if (err != 0) { 1119 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1120 " error status: %d\n", err); 1121 return; 1122 } 1123 1124 if (sc->adopted_rx_filter_bug) 1125 return; 1126 1127 if (ifp->if_flags & IFF_ALLMULTI) 1128 /* request to disable multicast filtering, so quit here */ 1129 return; 1130 1131 /* Flush all the filters */ 1132 1133 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1134 if (err != 0) { 1135 device_printf(sc->dev, 1136 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1137 ", error status: %d\n", err); 1138 return; 1139 } 1140 1141 /* Walk the multicast list, and add each address */ 1142 ctx.sc = sc; 1143 ctx.error = 0; 1144 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1145 if (ctx.error != 0) { 1146 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1147 "error status:" "%d\t", ctx.error); 1148 /* abort, leaving multicast filtering off */ 1149 return; 1150 } 1151 1152 /* Enable multicast filtering */ 1153 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1154 if (err != 0) { 1155 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1156 ", error status: %d\n", err); 1157 } 1158 } 1159 1160 static int 1161 mxge_max_mtu(mxge_softc_t *sc) 1162 { 1163 mxge_cmd_t cmd; 1164 int status; 1165 1166 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1167 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1168 1169 /* try to set nbufs to see if it we can 1170 use virtually contiguous jumbos */ 1171 cmd.data0 = 0; 1172 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1173 &cmd); 1174 if (status == 0) 1175 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1176 1177 /* otherwise, we're limited to MJUMPAGESIZE */ 1178 return MJUMPAGESIZE - MXGEFW_PAD; 1179 } 1180 1181 static int 1182 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1183 { 1184 struct mxge_slice_state *ss; 1185 mxge_rx_done_t *rx_done; 1186 volatile uint32_t *irq_claim; 1187 mxge_cmd_t cmd; 1188 int slice, status; 1189 1190 /* try to send a reset command to the card to see if it 1191 is alive */ 1192 memset(&cmd, 0, sizeof (cmd)); 1193 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1194 if (status != 0) { 1195 device_printf(sc->dev, "failed reset\n"); 1196 return ENXIO; 1197 } 1198 1199 mxge_dummy_rdma(sc, 1); 1200 1201 /* set the intrq size */ 1202 cmd.data0 = sc->rx_ring_size; 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1204 1205 /* 1206 * Even though we already know how many slices are supported 1207 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1208 * has magic side effects, and must be called after a reset. 1209 * It must be called prior to calling any RSS related cmds, 1210 * including assigning an interrupt queue for anything but 1211 * slice 0. It must also be called *after* 1212 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1213 * the firmware to compute offsets. 1214 */ 1215 1216 if (sc->num_slices > 1) { 1217 /* ask the maximum number of slices it supports */ 1218 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1219 &cmd); 1220 if (status != 0) { 1221 device_printf(sc->dev, 1222 "failed to get number of slices\n"); 1223 return status; 1224 } 1225 /* 1226 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1227 * to setting up the interrupt queue DMA 1228 */ 1229 cmd.data0 = sc->num_slices; 1230 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1231 #ifdef IFNET_BUF_RING 1232 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1233 #endif 1234 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1235 &cmd); 1236 if (status != 0) { 1237 device_printf(sc->dev, 1238 "failed to set number of slices\n"); 1239 return status; 1240 } 1241 } 1242 1243 if (interrupts_setup) { 1244 /* Now exchange information about interrupts */ 1245 for (slice = 0; slice < sc->num_slices; slice++) { 1246 rx_done = &sc->ss[slice].rx_done; 1247 memset(rx_done->entry, 0, sc->rx_ring_size); 1248 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1249 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1250 cmd.data2 = slice; 1251 status |= mxge_send_cmd(sc, 1252 MXGEFW_CMD_SET_INTRQ_DMA, 1253 &cmd); 1254 } 1255 } 1256 1257 status |= mxge_send_cmd(sc, 1258 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1259 1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1261 1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1264 1265 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1266 &cmd); 1267 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1268 if (status != 0) { 1269 device_printf(sc->dev, "failed set interrupt parameters\n"); 1270 return status; 1271 } 1272 1273 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1274 1275 /* run a DMA benchmark */ 1276 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1277 1278 for (slice = 0; slice < sc->num_slices; slice++) { 1279 ss = &sc->ss[slice]; 1280 1281 ss->irq_claim = irq_claim + (2 * slice); 1282 /* reset mcp/driver shared state back to 0 */ 1283 ss->rx_done.idx = 0; 1284 ss->rx_done.cnt = 0; 1285 ss->tx.req = 0; 1286 ss->tx.done = 0; 1287 ss->tx.pkt_done = 0; 1288 ss->tx.queue_active = 0; 1289 ss->tx.activate = 0; 1290 ss->tx.deactivate = 0; 1291 ss->tx.wake = 0; 1292 ss->tx.defrag = 0; 1293 ss->tx.stall = 0; 1294 ss->rx_big.cnt = 0; 1295 ss->rx_small.cnt = 0; 1296 ss->lc.lro_bad_csum = 0; 1297 ss->lc.lro_queued = 0; 1298 ss->lc.lro_flushed = 0; 1299 if (ss->fw_stats != NULL) { 1300 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1301 } 1302 } 1303 sc->rdma_tags_available = 15; 1304 status = mxge_update_mac_address(sc); 1305 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1306 mxge_change_pause(sc, sc->pause); 1307 mxge_set_multicast_list(sc); 1308 if (sc->throttle) { 1309 cmd.data0 = sc->throttle; 1310 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1311 &cmd)) { 1312 device_printf(sc->dev, 1313 "can't enable throttle\n"); 1314 } 1315 } 1316 return status; 1317 } 1318 1319 static int 1320 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1321 { 1322 mxge_cmd_t cmd; 1323 mxge_softc_t *sc; 1324 int err; 1325 unsigned int throttle; 1326 1327 sc = arg1; 1328 throttle = sc->throttle; 1329 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1330 if (err != 0) { 1331 return err; 1332 } 1333 1334 if (throttle == sc->throttle) 1335 return 0; 1336 1337 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1338 return EINVAL; 1339 1340 mtx_lock(&sc->driver_mtx); 1341 cmd.data0 = throttle; 1342 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1343 if (err == 0) 1344 sc->throttle = throttle; 1345 mtx_unlock(&sc->driver_mtx); 1346 return err; 1347 } 1348 1349 static int 1350 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1351 { 1352 mxge_softc_t *sc; 1353 unsigned int intr_coal_delay; 1354 int err; 1355 1356 sc = arg1; 1357 intr_coal_delay = sc->intr_coal_delay; 1358 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1359 if (err != 0) { 1360 return err; 1361 } 1362 if (intr_coal_delay == sc->intr_coal_delay) 1363 return 0; 1364 1365 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1366 return EINVAL; 1367 1368 mtx_lock(&sc->driver_mtx); 1369 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1370 sc->intr_coal_delay = intr_coal_delay; 1371 1372 mtx_unlock(&sc->driver_mtx); 1373 return err; 1374 } 1375 1376 static int 1377 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1378 { 1379 mxge_softc_t *sc; 1380 unsigned int enabled; 1381 int err; 1382 1383 sc = arg1; 1384 enabled = sc->pause; 1385 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1386 if (err != 0) { 1387 return err; 1388 } 1389 if (enabled == sc->pause) 1390 return 0; 1391 1392 mtx_lock(&sc->driver_mtx); 1393 err = mxge_change_pause(sc, enabled); 1394 mtx_unlock(&sc->driver_mtx); 1395 return err; 1396 } 1397 1398 static int 1399 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1400 { 1401 int err; 1402 1403 if (arg1 == NULL) 1404 return EFAULT; 1405 arg2 = be32toh(*(int *)arg1); 1406 arg1 = NULL; 1407 err = sysctl_handle_int(oidp, arg1, arg2, req); 1408 1409 return err; 1410 } 1411 1412 static void 1413 mxge_rem_sysctls(mxge_softc_t *sc) 1414 { 1415 struct mxge_slice_state *ss; 1416 int slice; 1417 1418 if (sc->slice_sysctl_tree == NULL) 1419 return; 1420 1421 for (slice = 0; slice < sc->num_slices; slice++) { 1422 ss = &sc->ss[slice]; 1423 if (ss == NULL || ss->sysctl_tree == NULL) 1424 continue; 1425 sysctl_ctx_free(&ss->sysctl_ctx); 1426 ss->sysctl_tree = NULL; 1427 } 1428 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1429 sc->slice_sysctl_tree = NULL; 1430 } 1431 1432 static void 1433 mxge_add_sysctls(mxge_softc_t *sc) 1434 { 1435 struct sysctl_ctx_list *ctx; 1436 struct sysctl_oid_list *children; 1437 mcp_irq_data_t *fw; 1438 struct mxge_slice_state *ss; 1439 int slice; 1440 char slice_num[8]; 1441 1442 ctx = device_get_sysctl_ctx(sc->dev); 1443 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1444 fw = sc->ss[0].fw_stats; 1445 1446 /* random information */ 1447 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1448 "firmware_version", 1449 CTLFLAG_RD, sc->fw_version, 1450 0, "firmware version"); 1451 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1452 "serial_number", 1453 CTLFLAG_RD, sc->serial_number_string, 1454 0, "serial number"); 1455 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1456 "product_code", 1457 CTLFLAG_RD, sc->product_code_string, 1458 0, "product_code"); 1459 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1460 "pcie_link_width", 1461 CTLFLAG_RD, &sc->link_width, 1462 0, "tx_boundary"); 1463 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1464 "tx_boundary", 1465 CTLFLAG_RD, &sc->tx_boundary, 1466 0, "tx_boundary"); 1467 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1468 "write_combine", 1469 CTLFLAG_RD, &sc->wc, 1470 0, "write combining PIO?"); 1471 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1472 "read_dma_MBs", 1473 CTLFLAG_RD, &sc->read_dma, 1474 0, "DMA Read speed in MB/s"); 1475 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1476 "write_dma_MBs", 1477 CTLFLAG_RD, &sc->write_dma, 1478 0, "DMA Write speed in MB/s"); 1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1480 "read_write_dma_MBs", 1481 CTLFLAG_RD, &sc->read_write_dma, 1482 0, "DMA concurrent Read/Write speed in MB/s"); 1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1484 "watchdog_resets", 1485 CTLFLAG_RD, &sc->watchdog_resets, 1486 0, "Number of times NIC was reset"); 1487 1488 /* performance related tunables */ 1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1490 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1491 sc, 0, mxge_change_intr_coal, "I", 1492 "interrupt coalescing delay in usecs"); 1493 1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1495 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1496 mxge_change_throttle, "I", "transmit throttling"); 1497 1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1499 "flow_control_enabled", 1500 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1501 mxge_change_flow_control, "I", 1502 "interrupt coalescing delay in usecs"); 1503 1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1505 "deassert_wait", 1506 CTLFLAG_RW, &mxge_deassert_wait, 1507 0, "Wait for IRQ line to go low in ihandler"); 1508 1509 /* stats block from firmware is in network byte order. 1510 Need to swap it */ 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1513 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1515 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1516 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1517 "rdma_tags_available"); 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1520 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1521 "dropped_bad_crc32"); 1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1523 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1524 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1526 "dropped_link_error_or_filtered", 1527 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1528 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1529 "dropped_link_error_or_filtered"); 1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1531 "dropped_link_overflow", 1532 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1533 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1534 "dropped_link_overflow"); 1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1536 "dropped_multicast_filtered", 1537 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1538 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1539 "dropped_multicast_filtered"); 1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1541 "dropped_no_big_buffer", 1542 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1543 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1544 "dropped_no_big_buffer"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_no_small_buffer", 1547 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1548 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1549 "dropped_no_small_buffer"); 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "dropped_overrun", 1552 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1553 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1554 "dropped_overrun"); 1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1556 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1557 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1559 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1560 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1561 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "dropped_unicast_filtered", 1564 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1565 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1566 "dropped_unicast_filtered"); 1567 1568 /* verbose printing? */ 1569 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1570 "verbose", 1571 CTLFLAG_RW, &mxge_verbose, 1572 0, "verbose printing"); 1573 1574 /* add counters exported for debugging from all slices */ 1575 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1576 sc->slice_sysctl_tree = 1577 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1578 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1579 1580 for (slice = 0; slice < sc->num_slices; slice++) { 1581 ss = &sc->ss[slice]; 1582 sysctl_ctx_init(&ss->sysctl_ctx); 1583 ctx = &ss->sysctl_ctx; 1584 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1585 sprintf(slice_num, "%d", slice); 1586 ss->sysctl_tree = 1587 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1588 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1589 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1590 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1591 "rx_small_cnt", 1592 CTLFLAG_RD, &ss->rx_small.cnt, 1593 0, "rx_small_cnt"); 1594 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1595 "rx_big_cnt", 1596 CTLFLAG_RD, &ss->rx_big.cnt, 1597 0, "rx_small_cnt"); 1598 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1599 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1600 0, "number of lro merge queues flushed"); 1601 1602 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1603 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1604 0, "number of bad csums preventing LRO"); 1605 1606 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1607 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1608 0, "number of frames appended to lro merge" 1609 "queues"); 1610 1611 #ifndef IFNET_BUF_RING 1612 /* only transmit from slice 0 for now */ 1613 if (slice > 0) 1614 continue; 1615 #endif 1616 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1617 "tx_req", 1618 CTLFLAG_RD, &ss->tx.req, 1619 0, "tx_req"); 1620 1621 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1622 "tx_done", 1623 CTLFLAG_RD, &ss->tx.done, 1624 0, "tx_done"); 1625 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1626 "tx_pkt_done", 1627 CTLFLAG_RD, &ss->tx.pkt_done, 1628 0, "tx_done"); 1629 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1630 "tx_stall", 1631 CTLFLAG_RD, &ss->tx.stall, 1632 0, "tx_stall"); 1633 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1634 "tx_wake", 1635 CTLFLAG_RD, &ss->tx.wake, 1636 0, "tx_wake"); 1637 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1638 "tx_defrag", 1639 CTLFLAG_RD, &ss->tx.defrag, 1640 0, "tx_defrag"); 1641 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1642 "tx_queue_active", 1643 CTLFLAG_RD, &ss->tx.queue_active, 1644 0, "tx_queue_active"); 1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1646 "tx_activate", 1647 CTLFLAG_RD, &ss->tx.activate, 1648 0, "tx_activate"); 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "tx_deactivate", 1651 CTLFLAG_RD, &ss->tx.deactivate, 1652 0, "tx_deactivate"); 1653 } 1654 } 1655 1656 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1657 backwards one at a time and handle ring wraps */ 1658 1659 static inline void 1660 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1661 mcp_kreq_ether_send_t *src, int cnt) 1662 { 1663 int idx, starting_slot; 1664 starting_slot = tx->req; 1665 while (cnt > 1) { 1666 cnt--; 1667 idx = (starting_slot + cnt) & tx->mask; 1668 mxge_pio_copy(&tx->lanai[idx], 1669 &src[cnt], sizeof(*src)); 1670 wmb(); 1671 } 1672 } 1673 1674 /* 1675 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1676 * at most 32 bytes at a time, so as to avoid involving the software 1677 * pio handler in the nic. We re-write the first segment's flags 1678 * to mark them valid only after writing the entire chain 1679 */ 1680 1681 static inline void 1682 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1683 int cnt) 1684 { 1685 int idx, i; 1686 uint32_t *src_ints; 1687 volatile uint32_t *dst_ints; 1688 mcp_kreq_ether_send_t *srcp; 1689 volatile mcp_kreq_ether_send_t *dstp, *dst; 1690 uint8_t last_flags; 1691 1692 idx = tx->req & tx->mask; 1693 1694 last_flags = src->flags; 1695 src->flags = 0; 1696 wmb(); 1697 dst = dstp = &tx->lanai[idx]; 1698 srcp = src; 1699 1700 if ((idx + cnt) < tx->mask) { 1701 for (i = 0; i < (cnt - 1); i += 2) { 1702 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1703 wmb(); /* force write every 32 bytes */ 1704 srcp += 2; 1705 dstp += 2; 1706 } 1707 } else { 1708 /* submit all but the first request, and ensure 1709 that it is submitted below */ 1710 mxge_submit_req_backwards(tx, src, cnt); 1711 i = 0; 1712 } 1713 if (i < cnt) { 1714 /* submit the first request */ 1715 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1716 wmb(); /* barrier before setting valid flag */ 1717 } 1718 1719 /* re-write the last 32-bits with the valid flags */ 1720 src->flags = last_flags; 1721 src_ints = (uint32_t *)src; 1722 src_ints+=3; 1723 dst_ints = (volatile uint32_t *)dst; 1724 dst_ints+=3; 1725 *dst_ints = *src_ints; 1726 tx->req += cnt; 1727 wmb(); 1728 } 1729 1730 static int 1731 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1732 struct mxge_pkt_info *pi) 1733 { 1734 struct ether_vlan_header *eh; 1735 uint16_t etype; 1736 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1737 #if IFCAP_TSO6 && defined(INET6) 1738 int nxt; 1739 #endif 1740 1741 eh = mtod(m, struct ether_vlan_header *); 1742 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1743 etype = ntohs(eh->evl_proto); 1744 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1745 } else { 1746 etype = ntohs(eh->evl_encap_proto); 1747 pi->ip_off = ETHER_HDR_LEN; 1748 } 1749 1750 switch (etype) { 1751 case ETHERTYPE_IP: 1752 /* 1753 * ensure ip header is in first mbuf, copy it to a 1754 * scratch buffer if not 1755 */ 1756 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1757 pi->ip6 = NULL; 1758 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1759 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1760 ss->scratch); 1761 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1762 } 1763 pi->ip_hlen = pi->ip->ip_hl << 2; 1764 if (!tso) 1765 return 0; 1766 1767 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1768 sizeof(struct tcphdr))) { 1769 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1770 sizeof(struct tcphdr), ss->scratch); 1771 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1772 } 1773 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1774 break; 1775 #if IFCAP_TSO6 && defined(INET6) 1776 case ETHERTYPE_IPV6: 1777 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1778 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1779 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1780 ss->scratch); 1781 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1782 } 1783 nxt = 0; 1784 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1785 pi->ip_hlen -= pi->ip_off; 1786 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1787 return EINVAL; 1788 1789 if (!tso) 1790 return 0; 1791 1792 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1793 return EINVAL; 1794 1795 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1796 sizeof(struct tcphdr))) { 1797 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1798 sizeof(struct tcphdr), ss->scratch); 1799 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1800 } 1801 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1802 break; 1803 #endif 1804 default: 1805 return EINVAL; 1806 } 1807 return 0; 1808 } 1809 1810 #if IFCAP_TSO4 1811 1812 static void 1813 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1814 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1815 { 1816 mxge_tx_ring_t *tx; 1817 mcp_kreq_ether_send_t *req; 1818 bus_dma_segment_t *seg; 1819 uint32_t low, high_swapped; 1820 int len, seglen, cum_len, cum_len_next; 1821 int next_is_first, chop, cnt, rdma_count, small; 1822 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1823 uint8_t flags, flags_next; 1824 static int once; 1825 1826 mss = m->m_pkthdr.tso_segsz; 1827 1828 /* negative cum_len signifies to the 1829 * send loop that we are still in the 1830 * header portion of the TSO packet. 1831 */ 1832 1833 cksum_offset = pi->ip_off + pi->ip_hlen; 1834 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1835 1836 /* TSO implies checksum offload on this hardware */ 1837 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1838 /* 1839 * If packet has full TCP csum, replace it with pseudo hdr 1840 * sum that the NIC expects, otherwise the NIC will emit 1841 * packets with bad TCP checksums. 1842 */ 1843 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1844 if (pi->ip6) { 1845 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1846 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1847 sum = in6_cksum_pseudo(pi->ip6, 1848 m->m_pkthdr.len - cksum_offset, 1849 IPPROTO_TCP, 0); 1850 #endif 1851 } else { 1852 #ifdef INET 1853 m->m_pkthdr.csum_flags |= CSUM_TCP; 1854 sum = in_pseudo(pi->ip->ip_src.s_addr, 1855 pi->ip->ip_dst.s_addr, 1856 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1857 cksum_offset))); 1858 #endif 1859 } 1860 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1861 cksum_offset, sizeof(sum), (caddr_t)&sum); 1862 } 1863 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1864 1865 /* for TSO, pseudo_hdr_offset holds mss. 1866 * The firmware figures out where to put 1867 * the checksum by parsing the header. */ 1868 pseudo_hdr_offset = htobe16(mss); 1869 1870 if (pi->ip6) { 1871 /* 1872 * for IPv6 TSO, the "checksum offset" is re-purposed 1873 * to store the TCP header len 1874 */ 1875 cksum_offset = (pi->tcp->th_off << 2); 1876 } 1877 1878 tx = &ss->tx; 1879 req = tx->req_list; 1880 seg = tx->seg_list; 1881 cnt = 0; 1882 rdma_count = 0; 1883 /* "rdma_count" is the number of RDMAs belonging to the 1884 * current packet BEFORE the current send request. For 1885 * non-TSO packets, this is equal to "count". 1886 * For TSO packets, rdma_count needs to be reset 1887 * to 0 after a segment cut. 1888 * 1889 * The rdma_count field of the send request is 1890 * the number of RDMAs of the packet starting at 1891 * that request. For TSO send requests with one ore more cuts 1892 * in the middle, this is the number of RDMAs starting 1893 * after the last cut in the request. All previous 1894 * segments before the last cut implicitly have 1 RDMA. 1895 * 1896 * Since the number of RDMAs is not known beforehand, 1897 * it must be filled-in retroactively - after each 1898 * segmentation cut or at the end of the entire packet. 1899 */ 1900 1901 while (busdma_seg_cnt) { 1902 /* Break the busdma segment up into pieces*/ 1903 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1904 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1905 len = seg->ds_len; 1906 1907 while (len) { 1908 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1909 seglen = len; 1910 cum_len_next = cum_len + seglen; 1911 (req-rdma_count)->rdma_count = rdma_count + 1; 1912 if (__predict_true(cum_len >= 0)) { 1913 /* payload */ 1914 chop = (cum_len_next > mss); 1915 cum_len_next = cum_len_next % mss; 1916 next_is_first = (cum_len_next == 0); 1917 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1918 flags_next |= next_is_first * 1919 MXGEFW_FLAGS_FIRST; 1920 rdma_count |= -(chop | next_is_first); 1921 rdma_count += chop & !next_is_first; 1922 } else if (cum_len_next >= 0) { 1923 /* header ends */ 1924 rdma_count = -1; 1925 cum_len_next = 0; 1926 seglen = -cum_len; 1927 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1928 flags_next = MXGEFW_FLAGS_TSO_PLD | 1929 MXGEFW_FLAGS_FIRST | 1930 (small * MXGEFW_FLAGS_SMALL); 1931 } 1932 1933 req->addr_high = high_swapped; 1934 req->addr_low = htobe32(low); 1935 req->pseudo_hdr_offset = pseudo_hdr_offset; 1936 req->pad = 0; 1937 req->rdma_count = 1; 1938 req->length = htobe16(seglen); 1939 req->cksum_offset = cksum_offset; 1940 req->flags = flags | ((cum_len & 1) * 1941 MXGEFW_FLAGS_ALIGN_ODD); 1942 low += seglen; 1943 len -= seglen; 1944 cum_len = cum_len_next; 1945 flags = flags_next; 1946 req++; 1947 cnt++; 1948 rdma_count++; 1949 if (cksum_offset != 0 && !pi->ip6) { 1950 if (__predict_false(cksum_offset > seglen)) 1951 cksum_offset -= seglen; 1952 else 1953 cksum_offset = 0; 1954 } 1955 if (__predict_false(cnt > tx->max_desc)) 1956 goto drop; 1957 } 1958 busdma_seg_cnt--; 1959 seg++; 1960 } 1961 (req-rdma_count)->rdma_count = rdma_count; 1962 1963 do { 1964 req--; 1965 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1966 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1967 1968 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1969 mxge_submit_req(tx, tx->req_list, cnt); 1970 #ifdef IFNET_BUF_RING 1971 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1972 /* tell the NIC to start polling this slice */ 1973 *tx->send_go = 1; 1974 tx->queue_active = 1; 1975 tx->activate++; 1976 wmb(); 1977 } 1978 #endif 1979 return; 1980 1981 drop: 1982 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1983 m_freem(m); 1984 ss->oerrors++; 1985 if (!once) { 1986 printf("tx->max_desc exceeded via TSO!\n"); 1987 printf("mss = %d, %ld, %d!\n", mss, 1988 (long)seg - (long)tx->seg_list, tx->max_desc); 1989 once = 1; 1990 } 1991 return; 1992 1993 } 1994 1995 #endif /* IFCAP_TSO4 */ 1996 1997 #ifdef MXGE_NEW_VLAN_API 1998 /* 1999 * We reproduce the software vlan tag insertion from 2000 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2001 * vlan tag insertion. We need to advertise this in order to have the 2002 * vlan interface respect our csum offload flags. 2003 */ 2004 static struct mbuf * 2005 mxge_vlan_tag_insert(struct mbuf *m) 2006 { 2007 struct ether_vlan_header *evl; 2008 2009 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2010 if (__predict_false(m == NULL)) 2011 return NULL; 2012 if (m->m_len < sizeof(*evl)) { 2013 m = m_pullup(m, sizeof(*evl)); 2014 if (__predict_false(m == NULL)) 2015 return NULL; 2016 } 2017 /* 2018 * Transform the Ethernet header into an Ethernet header 2019 * with 802.1Q encapsulation. 2020 */ 2021 evl = mtod(m, struct ether_vlan_header *); 2022 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2023 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2024 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2025 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2026 m->m_flags &= ~M_VLANTAG; 2027 return m; 2028 } 2029 #endif /* MXGE_NEW_VLAN_API */ 2030 2031 static void 2032 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2033 { 2034 struct mxge_pkt_info pi = {0,0,0,0}; 2035 mxge_softc_t *sc; 2036 mcp_kreq_ether_send_t *req; 2037 bus_dma_segment_t *seg; 2038 struct mbuf *m_tmp; 2039 struct ifnet *ifp; 2040 mxge_tx_ring_t *tx; 2041 int cnt, cum_len, err, i, idx, odd_flag; 2042 uint16_t pseudo_hdr_offset; 2043 uint8_t flags, cksum_offset; 2044 2045 sc = ss->sc; 2046 ifp = sc->ifp; 2047 tx = &ss->tx; 2048 2049 #ifdef MXGE_NEW_VLAN_API 2050 if (m->m_flags & M_VLANTAG) { 2051 m = mxge_vlan_tag_insert(m); 2052 if (__predict_false(m == NULL)) 2053 goto drop_without_m; 2054 } 2055 #endif 2056 if (m->m_pkthdr.csum_flags & 2057 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2058 if (mxge_parse_tx(ss, m, &pi)) 2059 goto drop; 2060 } 2061 2062 /* (try to) map the frame for DMA */ 2063 idx = tx->req & tx->mask; 2064 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2065 m, tx->seg_list, &cnt, 2066 BUS_DMA_NOWAIT); 2067 if (__predict_false(err == EFBIG)) { 2068 /* Too many segments in the chain. Try 2069 to defrag */ 2070 m_tmp = m_defrag(m, M_NOWAIT); 2071 if (m_tmp == NULL) { 2072 goto drop; 2073 } 2074 ss->tx.defrag++; 2075 m = m_tmp; 2076 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2077 tx->info[idx].map, 2078 m, tx->seg_list, &cnt, 2079 BUS_DMA_NOWAIT); 2080 } 2081 if (__predict_false(err != 0)) { 2082 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2083 " packet len = %d\n", err, m->m_pkthdr.len); 2084 goto drop; 2085 } 2086 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2087 BUS_DMASYNC_PREWRITE); 2088 tx->info[idx].m = m; 2089 2090 #if IFCAP_TSO4 2091 /* TSO is different enough, we handle it in another routine */ 2092 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2093 mxge_encap_tso(ss, m, cnt, &pi); 2094 return; 2095 } 2096 #endif 2097 2098 req = tx->req_list; 2099 cksum_offset = 0; 2100 pseudo_hdr_offset = 0; 2101 flags = MXGEFW_FLAGS_NO_TSO; 2102 2103 /* checksum offloading? */ 2104 if (m->m_pkthdr.csum_flags & 2105 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2106 /* ensure ip header is in first mbuf, copy 2107 it to a scratch buffer if not */ 2108 cksum_offset = pi.ip_off + pi.ip_hlen; 2109 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2110 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2111 req->cksum_offset = cksum_offset; 2112 flags |= MXGEFW_FLAGS_CKSUM; 2113 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2114 } else { 2115 odd_flag = 0; 2116 } 2117 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2118 flags |= MXGEFW_FLAGS_SMALL; 2119 2120 /* convert segments into a request list */ 2121 cum_len = 0; 2122 seg = tx->seg_list; 2123 req->flags = MXGEFW_FLAGS_FIRST; 2124 for (i = 0; i < cnt; i++) { 2125 req->addr_low = 2126 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2127 req->addr_high = 2128 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2129 req->length = htobe16(seg->ds_len); 2130 req->cksum_offset = cksum_offset; 2131 if (cksum_offset > seg->ds_len) 2132 cksum_offset -= seg->ds_len; 2133 else 2134 cksum_offset = 0; 2135 req->pseudo_hdr_offset = pseudo_hdr_offset; 2136 req->pad = 0; /* complete solid 16-byte block */ 2137 req->rdma_count = 1; 2138 req->flags |= flags | ((cum_len & 1) * odd_flag); 2139 cum_len += seg->ds_len; 2140 seg++; 2141 req++; 2142 req->flags = 0; 2143 } 2144 req--; 2145 /* pad runts to 60 bytes */ 2146 if (cum_len < 60) { 2147 req++; 2148 req->addr_low = 2149 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2150 req->addr_high = 2151 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2152 req->length = htobe16(60 - cum_len); 2153 req->cksum_offset = 0; 2154 req->pseudo_hdr_offset = pseudo_hdr_offset; 2155 req->pad = 0; /* complete solid 16-byte block */ 2156 req->rdma_count = 1; 2157 req->flags |= flags | ((cum_len & 1) * odd_flag); 2158 cnt++; 2159 } 2160 2161 tx->req_list[0].rdma_count = cnt; 2162 #if 0 2163 /* print what the firmware will see */ 2164 for (i = 0; i < cnt; i++) { 2165 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2166 "cso:%d, flags:0x%x, rdma:%d\n", 2167 i, (int)ntohl(tx->req_list[i].addr_high), 2168 (int)ntohl(tx->req_list[i].addr_low), 2169 (int)ntohs(tx->req_list[i].length), 2170 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2171 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2172 tx->req_list[i].rdma_count); 2173 } 2174 printf("--------------\n"); 2175 #endif 2176 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2177 mxge_submit_req(tx, tx->req_list, cnt); 2178 #ifdef IFNET_BUF_RING 2179 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2180 /* tell the NIC to start polling this slice */ 2181 *tx->send_go = 1; 2182 tx->queue_active = 1; 2183 tx->activate++; 2184 wmb(); 2185 } 2186 #endif 2187 return; 2188 2189 drop: 2190 m_freem(m); 2191 drop_without_m: 2192 ss->oerrors++; 2193 return; 2194 } 2195 2196 #ifdef IFNET_BUF_RING 2197 static void 2198 mxge_qflush(struct ifnet *ifp) 2199 { 2200 mxge_softc_t *sc = ifp->if_softc; 2201 mxge_tx_ring_t *tx; 2202 struct mbuf *m; 2203 int slice; 2204 2205 for (slice = 0; slice < sc->num_slices; slice++) { 2206 tx = &sc->ss[slice].tx; 2207 mtx_lock(&tx->mtx); 2208 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2209 m_freem(m); 2210 mtx_unlock(&tx->mtx); 2211 } 2212 if_qflush(ifp); 2213 } 2214 2215 static inline void 2216 mxge_start_locked(struct mxge_slice_state *ss) 2217 { 2218 mxge_softc_t *sc; 2219 struct mbuf *m; 2220 struct ifnet *ifp; 2221 mxge_tx_ring_t *tx; 2222 2223 sc = ss->sc; 2224 ifp = sc->ifp; 2225 tx = &ss->tx; 2226 2227 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2228 m = drbr_dequeue(ifp, tx->br); 2229 if (m == NULL) { 2230 return; 2231 } 2232 /* let BPF see it */ 2233 BPF_MTAP(ifp, m); 2234 2235 /* give it to the nic */ 2236 mxge_encap(ss, m); 2237 } 2238 /* ran out of transmit slots */ 2239 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2240 && (!drbr_empty(ifp, tx->br))) { 2241 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2242 tx->stall++; 2243 } 2244 } 2245 2246 static int 2247 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2248 { 2249 mxge_softc_t *sc; 2250 struct ifnet *ifp; 2251 mxge_tx_ring_t *tx; 2252 int err; 2253 2254 sc = ss->sc; 2255 ifp = sc->ifp; 2256 tx = &ss->tx; 2257 2258 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2259 IFF_DRV_RUNNING) { 2260 err = drbr_enqueue(ifp, tx->br, m); 2261 return (err); 2262 } 2263 2264 if (!drbr_needs_enqueue(ifp, tx->br) && 2265 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2266 /* let BPF see it */ 2267 BPF_MTAP(ifp, m); 2268 /* give it to the nic */ 2269 mxge_encap(ss, m); 2270 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2271 return (err); 2272 } 2273 if (!drbr_empty(ifp, tx->br)) 2274 mxge_start_locked(ss); 2275 return (0); 2276 } 2277 2278 static int 2279 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2280 { 2281 mxge_softc_t *sc = ifp->if_softc; 2282 struct mxge_slice_state *ss; 2283 mxge_tx_ring_t *tx; 2284 int err = 0; 2285 int slice; 2286 2287 slice = m->m_pkthdr.flowid; 2288 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2289 2290 ss = &sc->ss[slice]; 2291 tx = &ss->tx; 2292 2293 if (mtx_trylock(&tx->mtx)) { 2294 err = mxge_transmit_locked(ss, m); 2295 mtx_unlock(&tx->mtx); 2296 } else { 2297 err = drbr_enqueue(ifp, tx->br, m); 2298 } 2299 2300 return (err); 2301 } 2302 2303 #else 2304 2305 static inline void 2306 mxge_start_locked(struct mxge_slice_state *ss) 2307 { 2308 mxge_softc_t *sc; 2309 struct mbuf *m; 2310 struct ifnet *ifp; 2311 mxge_tx_ring_t *tx; 2312 2313 sc = ss->sc; 2314 ifp = sc->ifp; 2315 tx = &ss->tx; 2316 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2317 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2318 if (m == NULL) { 2319 return; 2320 } 2321 /* let BPF see it */ 2322 BPF_MTAP(ifp, m); 2323 2324 /* give it to the nic */ 2325 mxge_encap(ss, m); 2326 } 2327 /* ran out of transmit slots */ 2328 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2329 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2330 tx->stall++; 2331 } 2332 } 2333 #endif 2334 static void 2335 mxge_start(struct ifnet *ifp) 2336 { 2337 mxge_softc_t *sc = ifp->if_softc; 2338 struct mxge_slice_state *ss; 2339 2340 /* only use the first slice for now */ 2341 ss = &sc->ss[0]; 2342 mtx_lock(&ss->tx.mtx); 2343 mxge_start_locked(ss); 2344 mtx_unlock(&ss->tx.mtx); 2345 } 2346 2347 /* 2348 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2349 * at most 32 bytes at a time, so as to avoid involving the software 2350 * pio handler in the nic. We re-write the first segment's low 2351 * DMA address to mark it valid only after we write the entire chunk 2352 * in a burst 2353 */ 2354 static inline void 2355 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2356 mcp_kreq_ether_recv_t *src) 2357 { 2358 uint32_t low; 2359 2360 low = src->addr_low; 2361 src->addr_low = 0xffffffff; 2362 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2363 wmb(); 2364 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2365 wmb(); 2366 src->addr_low = low; 2367 dst->addr_low = low; 2368 wmb(); 2369 } 2370 2371 static int 2372 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2373 { 2374 bus_dma_segment_t seg; 2375 struct mbuf *m; 2376 mxge_rx_ring_t *rx = &ss->rx_small; 2377 int cnt, err; 2378 2379 m = m_gethdr(M_NOWAIT, MT_DATA); 2380 if (m == NULL) { 2381 rx->alloc_fail++; 2382 err = ENOBUFS; 2383 goto done; 2384 } 2385 m->m_len = MHLEN; 2386 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2387 &seg, &cnt, BUS_DMA_NOWAIT); 2388 if (err != 0) { 2389 m_free(m); 2390 goto done; 2391 } 2392 rx->info[idx].m = m; 2393 rx->shadow[idx].addr_low = 2394 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2395 rx->shadow[idx].addr_high = 2396 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2397 2398 done: 2399 if ((idx & 7) == 7) 2400 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2401 return err; 2402 } 2403 2404 static int 2405 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2406 { 2407 bus_dma_segment_t seg[3]; 2408 struct mbuf *m; 2409 mxge_rx_ring_t *rx = &ss->rx_big; 2410 int cnt, err, i; 2411 2412 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2413 if (m == NULL) { 2414 rx->alloc_fail++; 2415 err = ENOBUFS; 2416 goto done; 2417 } 2418 m->m_len = rx->mlen; 2419 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2420 seg, &cnt, BUS_DMA_NOWAIT); 2421 if (err != 0) { 2422 m_free(m); 2423 goto done; 2424 } 2425 rx->info[idx].m = m; 2426 rx->shadow[idx].addr_low = 2427 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2428 rx->shadow[idx].addr_high = 2429 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2430 2431 #if MXGE_VIRT_JUMBOS 2432 for (i = 1; i < cnt; i++) { 2433 rx->shadow[idx + i].addr_low = 2434 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2435 rx->shadow[idx + i].addr_high = 2436 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2437 } 2438 #endif 2439 2440 done: 2441 for (i = 0; i < rx->nbufs; i++) { 2442 if ((idx & 7) == 7) { 2443 mxge_submit_8rx(&rx->lanai[idx - 7], 2444 &rx->shadow[idx - 7]); 2445 } 2446 idx++; 2447 } 2448 return err; 2449 } 2450 2451 #ifdef INET6 2452 2453 static uint16_t 2454 mxge_csum_generic(uint16_t *raw, int len) 2455 { 2456 uint32_t csum; 2457 2458 csum = 0; 2459 while (len > 0) { 2460 csum += *raw; 2461 raw++; 2462 len -= 2; 2463 } 2464 csum = (csum >> 16) + (csum & 0xffff); 2465 csum = (csum >> 16) + (csum & 0xffff); 2466 return (uint16_t)csum; 2467 } 2468 2469 static inline uint16_t 2470 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2471 { 2472 uint32_t partial; 2473 int nxt, cksum_offset; 2474 struct ip6_hdr *ip6 = p; 2475 uint16_t c; 2476 2477 nxt = ip6->ip6_nxt; 2478 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2479 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2480 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2481 IPPROTO_IPV6, &nxt); 2482 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2483 return (1); 2484 } 2485 2486 /* 2487 * IPv6 headers do not contain a checksum, and hence 2488 * do not checksum to zero, so they don't "fall out" 2489 * of the partial checksum calculation like IPv4 2490 * headers do. We need to fix the partial checksum by 2491 * subtracting the checksum of the IPv6 header. 2492 */ 2493 2494 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2495 ETHER_HDR_LEN); 2496 csum += ~partial; 2497 csum += (csum < ~partial); 2498 csum = (csum >> 16) + (csum & 0xFFFF); 2499 csum = (csum >> 16) + (csum & 0xFFFF); 2500 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2501 csum); 2502 c ^= 0xffff; 2503 return (c); 2504 } 2505 #endif /* INET6 */ 2506 /* 2507 * Myri10GE hardware checksums are not valid if the sender 2508 * padded the frame with non-zero padding. This is because 2509 * the firmware just does a simple 16-bit 1s complement 2510 * checksum across the entire frame, excluding the first 14 2511 * bytes. It is best to simply to check the checksum and 2512 * tell the stack about it only if the checksum is good 2513 */ 2514 2515 static inline uint16_t 2516 mxge_rx_csum(struct mbuf *m, int csum) 2517 { 2518 struct ether_header *eh; 2519 #ifdef INET 2520 struct ip *ip; 2521 #endif 2522 #if defined(INET) || defined(INET6) 2523 int cap = m->m_pkthdr.rcvif->if_capenable; 2524 #endif 2525 uint16_t c, etype; 2526 2527 eh = mtod(m, struct ether_header *); 2528 etype = ntohs(eh->ether_type); 2529 switch (etype) { 2530 #ifdef INET 2531 case ETHERTYPE_IP: 2532 if ((cap & IFCAP_RXCSUM) == 0) 2533 return (1); 2534 ip = (struct ip *)(eh + 1); 2535 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2536 return (1); 2537 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2538 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2539 (ip->ip_hl << 2) + ip->ip_p)); 2540 c ^= 0xffff; 2541 break; 2542 #endif 2543 #ifdef INET6 2544 case ETHERTYPE_IPV6: 2545 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2546 return (1); 2547 c = mxge_rx_csum6((eh + 1), m, csum); 2548 break; 2549 #endif 2550 default: 2551 c = 1; 2552 } 2553 return (c); 2554 } 2555 2556 static void 2557 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2558 { 2559 struct ether_vlan_header *evl; 2560 struct ether_header *eh; 2561 uint32_t partial; 2562 2563 evl = mtod(m, struct ether_vlan_header *); 2564 eh = mtod(m, struct ether_header *); 2565 2566 /* 2567 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2568 * after what the firmware thought was the end of the ethernet 2569 * header. 2570 */ 2571 2572 /* put checksum into host byte order */ 2573 *csum = ntohs(*csum); 2574 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2575 (*csum) += ~partial; 2576 (*csum) += ((*csum) < ~partial); 2577 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2578 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2579 2580 /* restore checksum to network byte order; 2581 later consumers expect this */ 2582 *csum = htons(*csum); 2583 2584 /* save the tag */ 2585 #ifdef MXGE_NEW_VLAN_API 2586 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2587 #else 2588 { 2589 struct m_tag *mtag; 2590 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2591 M_NOWAIT); 2592 if (mtag == NULL) 2593 return; 2594 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2595 m_tag_prepend(m, mtag); 2596 } 2597 2598 #endif 2599 m->m_flags |= M_VLANTAG; 2600 2601 /* 2602 * Remove the 802.1q header by copying the Ethernet 2603 * addresses over it and adjusting the beginning of 2604 * the data in the mbuf. The encapsulated Ethernet 2605 * type field is already in place. 2606 */ 2607 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2608 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2609 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2610 } 2611 2612 static inline void 2613 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2614 uint32_t csum, int lro) 2615 { 2616 mxge_softc_t *sc; 2617 struct ifnet *ifp; 2618 struct mbuf *m; 2619 struct ether_header *eh; 2620 mxge_rx_ring_t *rx; 2621 bus_dmamap_t old_map; 2622 int idx; 2623 2624 sc = ss->sc; 2625 ifp = sc->ifp; 2626 rx = &ss->rx_big; 2627 idx = rx->cnt & rx->mask; 2628 rx->cnt += rx->nbufs; 2629 /* save a pointer to the received mbuf */ 2630 m = rx->info[idx].m; 2631 /* try to replace the received mbuf */ 2632 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2633 /* drop the frame -- the old mbuf is re-cycled */ 2634 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2635 return; 2636 } 2637 2638 /* unmap the received buffer */ 2639 old_map = rx->info[idx].map; 2640 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2641 bus_dmamap_unload(rx->dmat, old_map); 2642 2643 /* swap the bus_dmamap_t's */ 2644 rx->info[idx].map = rx->extra_map; 2645 rx->extra_map = old_map; 2646 2647 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2648 * aligned */ 2649 m->m_data += MXGEFW_PAD; 2650 2651 m->m_pkthdr.rcvif = ifp; 2652 m->m_len = m->m_pkthdr.len = len; 2653 ss->ipackets++; 2654 eh = mtod(m, struct ether_header *); 2655 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2656 mxge_vlan_tag_remove(m, &csum); 2657 } 2658 /* flowid only valid if RSS hashing is enabled */ 2659 if (sc->num_slices > 1) { 2660 m->m_pkthdr.flowid = (ss - sc->ss); 2661 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2662 } 2663 /* if the checksum is valid, mark it in the mbuf header */ 2664 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2665 (0 == mxge_rx_csum(m, csum))) { 2666 /* Tell the stack that the checksum is good */ 2667 m->m_pkthdr.csum_data = 0xffff; 2668 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2669 CSUM_DATA_VALID; 2670 2671 #if defined(INET) || defined (INET6) 2672 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2673 return; 2674 #endif 2675 } 2676 /* pass the frame up the stack */ 2677 (*ifp->if_input)(ifp, m); 2678 } 2679 2680 static inline void 2681 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2682 uint32_t csum, int lro) 2683 { 2684 mxge_softc_t *sc; 2685 struct ifnet *ifp; 2686 struct ether_header *eh; 2687 struct mbuf *m; 2688 mxge_rx_ring_t *rx; 2689 bus_dmamap_t old_map; 2690 int idx; 2691 2692 sc = ss->sc; 2693 ifp = sc->ifp; 2694 rx = &ss->rx_small; 2695 idx = rx->cnt & rx->mask; 2696 rx->cnt++; 2697 /* save a pointer to the received mbuf */ 2698 m = rx->info[idx].m; 2699 /* try to replace the received mbuf */ 2700 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2701 /* drop the frame -- the old mbuf is re-cycled */ 2702 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2703 return; 2704 } 2705 2706 /* unmap the received buffer */ 2707 old_map = rx->info[idx].map; 2708 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2709 bus_dmamap_unload(rx->dmat, old_map); 2710 2711 /* swap the bus_dmamap_t's */ 2712 rx->info[idx].map = rx->extra_map; 2713 rx->extra_map = old_map; 2714 2715 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2716 * aligned */ 2717 m->m_data += MXGEFW_PAD; 2718 2719 m->m_pkthdr.rcvif = ifp; 2720 m->m_len = m->m_pkthdr.len = len; 2721 ss->ipackets++; 2722 eh = mtod(m, struct ether_header *); 2723 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2724 mxge_vlan_tag_remove(m, &csum); 2725 } 2726 /* flowid only valid if RSS hashing is enabled */ 2727 if (sc->num_slices > 1) { 2728 m->m_pkthdr.flowid = (ss - sc->ss); 2729 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2730 } 2731 /* if the checksum is valid, mark it in the mbuf header */ 2732 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2733 (0 == mxge_rx_csum(m, csum))) { 2734 /* Tell the stack that the checksum is good */ 2735 m->m_pkthdr.csum_data = 0xffff; 2736 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2737 CSUM_DATA_VALID; 2738 2739 #if defined(INET) || defined (INET6) 2740 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2741 return; 2742 #endif 2743 } 2744 /* pass the frame up the stack */ 2745 (*ifp->if_input)(ifp, m); 2746 } 2747 2748 static inline void 2749 mxge_clean_rx_done(struct mxge_slice_state *ss) 2750 { 2751 mxge_rx_done_t *rx_done = &ss->rx_done; 2752 int limit = 0; 2753 uint16_t length; 2754 uint16_t checksum; 2755 int lro; 2756 2757 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2758 while (rx_done->entry[rx_done->idx].length != 0) { 2759 length = ntohs(rx_done->entry[rx_done->idx].length); 2760 rx_done->entry[rx_done->idx].length = 0; 2761 checksum = rx_done->entry[rx_done->idx].checksum; 2762 if (length <= (MHLEN - MXGEFW_PAD)) 2763 mxge_rx_done_small(ss, length, checksum, lro); 2764 else 2765 mxge_rx_done_big(ss, length, checksum, lro); 2766 rx_done->cnt++; 2767 rx_done->idx = rx_done->cnt & rx_done->mask; 2768 2769 /* limit potential for livelock */ 2770 if (__predict_false(++limit > rx_done->mask / 2)) 2771 break; 2772 } 2773 #if defined(INET) || defined (INET6) 2774 tcp_lro_flush_all(&ss->lc); 2775 #endif 2776 } 2777 2778 static inline void 2779 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2780 { 2781 struct ifnet *ifp; 2782 mxge_tx_ring_t *tx; 2783 struct mbuf *m; 2784 bus_dmamap_t map; 2785 int idx; 2786 int *flags; 2787 2788 tx = &ss->tx; 2789 ifp = ss->sc->ifp; 2790 while (tx->pkt_done != mcp_idx) { 2791 idx = tx->done & tx->mask; 2792 tx->done++; 2793 m = tx->info[idx].m; 2794 /* mbuf and DMA map only attached to the first 2795 segment per-mbuf */ 2796 if (m != NULL) { 2797 ss->obytes += m->m_pkthdr.len; 2798 if (m->m_flags & M_MCAST) 2799 ss->omcasts++; 2800 ss->opackets++; 2801 tx->info[idx].m = NULL; 2802 map = tx->info[idx].map; 2803 bus_dmamap_unload(tx->dmat, map); 2804 m_freem(m); 2805 } 2806 if (tx->info[idx].flag) { 2807 tx->info[idx].flag = 0; 2808 tx->pkt_done++; 2809 } 2810 } 2811 2812 /* If we have space, clear IFF_OACTIVE to tell the stack that 2813 its OK to send packets */ 2814 #ifdef IFNET_BUF_RING 2815 flags = &ss->if_drv_flags; 2816 #else 2817 flags = &ifp->if_drv_flags; 2818 #endif 2819 mtx_lock(&ss->tx.mtx); 2820 if ((*flags) & IFF_DRV_OACTIVE && 2821 tx->req - tx->done < (tx->mask + 1)/4) { 2822 *(flags) &= ~IFF_DRV_OACTIVE; 2823 ss->tx.wake++; 2824 mxge_start_locked(ss); 2825 } 2826 #ifdef IFNET_BUF_RING 2827 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2828 /* let the NIC stop polling this queue, since there 2829 * are no more transmits pending */ 2830 if (tx->req == tx->done) { 2831 *tx->send_stop = 1; 2832 tx->queue_active = 0; 2833 tx->deactivate++; 2834 wmb(); 2835 } 2836 } 2837 #endif 2838 mtx_unlock(&ss->tx.mtx); 2839 2840 } 2841 2842 static struct mxge_media_type mxge_xfp_media_types[] = 2843 { 2844 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2845 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2846 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2847 {0, (1 << 5), "10GBASE-ER"}, 2848 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2849 {0, (1 << 3), "10GBASE-SW"}, 2850 {0, (1 << 2), "10GBASE-LW"}, 2851 {0, (1 << 1), "10GBASE-EW"}, 2852 {0, (1 << 0), "Reserved"} 2853 }; 2854 static struct mxge_media_type mxge_sfp_media_types[] = 2855 { 2856 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2857 {0, (1 << 7), "Reserved"}, 2858 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2859 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2860 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2861 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2862 }; 2863 2864 static void 2865 mxge_media_set(mxge_softc_t *sc, int media_type) 2866 { 2867 2868 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2869 0, NULL); 2870 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2871 sc->current_media = media_type; 2872 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2873 } 2874 2875 static void 2876 mxge_media_init(mxge_softc_t *sc) 2877 { 2878 char *ptr; 2879 int i; 2880 2881 ifmedia_removeall(&sc->media); 2882 mxge_media_set(sc, IFM_AUTO); 2883 2884 /* 2885 * parse the product code to deterimine the interface type 2886 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2887 * after the 3rd dash in the driver's cached copy of the 2888 * EEPROM's product code string. 2889 */ 2890 ptr = sc->product_code_string; 2891 if (ptr == NULL) { 2892 device_printf(sc->dev, "Missing product code\n"); 2893 return; 2894 } 2895 2896 for (i = 0; i < 3; i++, ptr++) { 2897 ptr = strchr(ptr, '-'); 2898 if (ptr == NULL) { 2899 device_printf(sc->dev, 2900 "only %d dashes in PC?!?\n", i); 2901 return; 2902 } 2903 } 2904 if (*ptr == 'C' || *(ptr +1) == 'C') { 2905 /* -C is CX4 */ 2906 sc->connector = MXGE_CX4; 2907 mxge_media_set(sc, IFM_10G_CX4); 2908 } else if (*ptr == 'Q') { 2909 /* -Q is Quad Ribbon Fiber */ 2910 sc->connector = MXGE_QRF; 2911 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2912 /* FreeBSD has no media type for Quad ribbon fiber */ 2913 } else if (*ptr == 'R') { 2914 /* -R is XFP */ 2915 sc->connector = MXGE_XFP; 2916 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2917 /* -S or -2S is SFP+ */ 2918 sc->connector = MXGE_SFP; 2919 } else { 2920 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2921 } 2922 } 2923 2924 /* 2925 * Determine the media type for a NIC. Some XFPs will identify 2926 * themselves only when their link is up, so this is initiated via a 2927 * link up interrupt. However, this can potentially take up to 2928 * several milliseconds, so it is run via the watchdog routine, rather 2929 * than in the interrupt handler itself. 2930 */ 2931 static void 2932 mxge_media_probe(mxge_softc_t *sc) 2933 { 2934 mxge_cmd_t cmd; 2935 char *cage_type; 2936 2937 struct mxge_media_type *mxge_media_types = NULL; 2938 int i, err, ms, mxge_media_type_entries; 2939 uint32_t byte; 2940 2941 sc->need_media_probe = 0; 2942 2943 if (sc->connector == MXGE_XFP) { 2944 /* -R is XFP */ 2945 mxge_media_types = mxge_xfp_media_types; 2946 mxge_media_type_entries = 2947 nitems(mxge_xfp_media_types); 2948 byte = MXGE_XFP_COMPLIANCE_BYTE; 2949 cage_type = "XFP"; 2950 } else if (sc->connector == MXGE_SFP) { 2951 /* -S or -2S is SFP+ */ 2952 mxge_media_types = mxge_sfp_media_types; 2953 mxge_media_type_entries = 2954 nitems(mxge_sfp_media_types); 2955 cage_type = "SFP+"; 2956 byte = 3; 2957 } else { 2958 /* nothing to do; media type cannot change */ 2959 return; 2960 } 2961 2962 /* 2963 * At this point we know the NIC has an XFP cage, so now we 2964 * try to determine what is in the cage by using the 2965 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2966 * register. We read just one byte, which may take over 2967 * a millisecond 2968 */ 2969 2970 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2971 cmd.data1 = byte; 2972 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2973 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2974 device_printf(sc->dev, "failed to read XFP\n"); 2975 } 2976 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2977 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2978 } 2979 if (err != MXGEFW_CMD_OK) { 2980 return; 2981 } 2982 2983 /* now we wait for the data to be cached */ 2984 cmd.data0 = byte; 2985 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2986 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2987 DELAY(1000); 2988 cmd.data0 = byte; 2989 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2990 } 2991 if (err != MXGEFW_CMD_OK) { 2992 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2993 cage_type, err, ms); 2994 return; 2995 } 2996 2997 if (cmd.data0 == mxge_media_types[0].bitmask) { 2998 if (mxge_verbose) 2999 device_printf(sc->dev, "%s:%s\n", cage_type, 3000 mxge_media_types[0].name); 3001 if (sc->current_media != mxge_media_types[0].flag) { 3002 mxge_media_init(sc); 3003 mxge_media_set(sc, mxge_media_types[0].flag); 3004 } 3005 return; 3006 } 3007 for (i = 1; i < mxge_media_type_entries; i++) { 3008 if (cmd.data0 & mxge_media_types[i].bitmask) { 3009 if (mxge_verbose) 3010 device_printf(sc->dev, "%s:%s\n", 3011 cage_type, 3012 mxge_media_types[i].name); 3013 3014 if (sc->current_media != mxge_media_types[i].flag) { 3015 mxge_media_init(sc); 3016 mxge_media_set(sc, mxge_media_types[i].flag); 3017 } 3018 return; 3019 } 3020 } 3021 if (mxge_verbose) 3022 device_printf(sc->dev, "%s media 0x%x unknown\n", 3023 cage_type, cmd.data0); 3024 3025 return; 3026 } 3027 3028 static void 3029 mxge_intr(void *arg) 3030 { 3031 struct mxge_slice_state *ss = arg; 3032 mxge_softc_t *sc = ss->sc; 3033 mcp_irq_data_t *stats = ss->fw_stats; 3034 mxge_tx_ring_t *tx = &ss->tx; 3035 mxge_rx_done_t *rx_done = &ss->rx_done; 3036 uint32_t send_done_count; 3037 uint8_t valid; 3038 3039 #ifndef IFNET_BUF_RING 3040 /* an interrupt on a non-zero slice is implicitly valid 3041 since MSI-X irqs are not shared */ 3042 if (ss != sc->ss) { 3043 mxge_clean_rx_done(ss); 3044 *ss->irq_claim = be32toh(3); 3045 return; 3046 } 3047 #endif 3048 3049 /* make sure the DMA has finished */ 3050 if (!stats->valid) { 3051 return; 3052 } 3053 valid = stats->valid; 3054 3055 if (sc->legacy_irq) { 3056 /* lower legacy IRQ */ 3057 *sc->irq_deassert = 0; 3058 if (!mxge_deassert_wait) 3059 /* don't wait for conf. that irq is low */ 3060 stats->valid = 0; 3061 } else { 3062 stats->valid = 0; 3063 } 3064 3065 /* loop while waiting for legacy irq deassertion */ 3066 do { 3067 /* check for transmit completes and receives */ 3068 send_done_count = be32toh(stats->send_done_count); 3069 while ((send_done_count != tx->pkt_done) || 3070 (rx_done->entry[rx_done->idx].length != 0)) { 3071 if (send_done_count != tx->pkt_done) 3072 mxge_tx_done(ss, (int)send_done_count); 3073 mxge_clean_rx_done(ss); 3074 send_done_count = be32toh(stats->send_done_count); 3075 } 3076 if (sc->legacy_irq && mxge_deassert_wait) 3077 wmb(); 3078 } while (*((volatile uint8_t *) &stats->valid)); 3079 3080 /* fw link & error stats meaningful only on the first slice */ 3081 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3082 if (sc->link_state != stats->link_up) { 3083 sc->link_state = stats->link_up; 3084 if (sc->link_state) { 3085 if_link_state_change(sc->ifp, LINK_STATE_UP); 3086 if (mxge_verbose) 3087 device_printf(sc->dev, "link up\n"); 3088 } else { 3089 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3090 if (mxge_verbose) 3091 device_printf(sc->dev, "link down\n"); 3092 } 3093 sc->need_media_probe = 1; 3094 } 3095 if (sc->rdma_tags_available != 3096 be32toh(stats->rdma_tags_available)) { 3097 sc->rdma_tags_available = 3098 be32toh(stats->rdma_tags_available); 3099 device_printf(sc->dev, "RDMA timed out! %d tags " 3100 "left\n", sc->rdma_tags_available); 3101 } 3102 3103 if (stats->link_down) { 3104 sc->down_cnt += stats->link_down; 3105 sc->link_state = 0; 3106 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3107 } 3108 } 3109 3110 /* check to see if we have rx token to pass back */ 3111 if (valid & 0x1) 3112 *ss->irq_claim = be32toh(3); 3113 *(ss->irq_claim + 1) = be32toh(3); 3114 } 3115 3116 static void 3117 mxge_init(void *arg) 3118 { 3119 mxge_softc_t *sc = arg; 3120 struct ifnet *ifp = sc->ifp; 3121 3122 mtx_lock(&sc->driver_mtx); 3123 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3124 (void) mxge_open(sc); 3125 mtx_unlock(&sc->driver_mtx); 3126 } 3127 3128 static void 3129 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3130 { 3131 int i; 3132 3133 #if defined(INET) || defined(INET6) 3134 tcp_lro_free(&ss->lc); 3135 #endif 3136 for (i = 0; i <= ss->rx_big.mask; i++) { 3137 if (ss->rx_big.info[i].m == NULL) 3138 continue; 3139 bus_dmamap_unload(ss->rx_big.dmat, 3140 ss->rx_big.info[i].map); 3141 m_freem(ss->rx_big.info[i].m); 3142 ss->rx_big.info[i].m = NULL; 3143 } 3144 3145 for (i = 0; i <= ss->rx_small.mask; i++) { 3146 if (ss->rx_small.info[i].m == NULL) 3147 continue; 3148 bus_dmamap_unload(ss->rx_small.dmat, 3149 ss->rx_small.info[i].map); 3150 m_freem(ss->rx_small.info[i].m); 3151 ss->rx_small.info[i].m = NULL; 3152 } 3153 3154 /* transmit ring used only on the first slice */ 3155 if (ss->tx.info == NULL) 3156 return; 3157 3158 for (i = 0; i <= ss->tx.mask; i++) { 3159 ss->tx.info[i].flag = 0; 3160 if (ss->tx.info[i].m == NULL) 3161 continue; 3162 bus_dmamap_unload(ss->tx.dmat, 3163 ss->tx.info[i].map); 3164 m_freem(ss->tx.info[i].m); 3165 ss->tx.info[i].m = NULL; 3166 } 3167 } 3168 3169 static void 3170 mxge_free_mbufs(mxge_softc_t *sc) 3171 { 3172 int slice; 3173 3174 for (slice = 0; slice < sc->num_slices; slice++) 3175 mxge_free_slice_mbufs(&sc->ss[slice]); 3176 } 3177 3178 static void 3179 mxge_free_slice_rings(struct mxge_slice_state *ss) 3180 { 3181 int i; 3182 3183 if (ss->rx_done.entry != NULL) 3184 mxge_dma_free(&ss->rx_done.dma); 3185 ss->rx_done.entry = NULL; 3186 3187 if (ss->tx.req_bytes != NULL) 3188 free(ss->tx.req_bytes, M_DEVBUF); 3189 ss->tx.req_bytes = NULL; 3190 3191 if (ss->tx.seg_list != NULL) 3192 free(ss->tx.seg_list, M_DEVBUF); 3193 ss->tx.seg_list = NULL; 3194 3195 if (ss->rx_small.shadow != NULL) 3196 free(ss->rx_small.shadow, M_DEVBUF); 3197 ss->rx_small.shadow = NULL; 3198 3199 if (ss->rx_big.shadow != NULL) 3200 free(ss->rx_big.shadow, M_DEVBUF); 3201 ss->rx_big.shadow = NULL; 3202 3203 if (ss->tx.info != NULL) { 3204 if (ss->tx.dmat != NULL) { 3205 for (i = 0; i <= ss->tx.mask; i++) { 3206 bus_dmamap_destroy(ss->tx.dmat, 3207 ss->tx.info[i].map); 3208 } 3209 bus_dma_tag_destroy(ss->tx.dmat); 3210 } 3211 free(ss->tx.info, M_DEVBUF); 3212 } 3213 ss->tx.info = NULL; 3214 3215 if (ss->rx_small.info != NULL) { 3216 if (ss->rx_small.dmat != NULL) { 3217 for (i = 0; i <= ss->rx_small.mask; i++) { 3218 bus_dmamap_destroy(ss->rx_small.dmat, 3219 ss->rx_small.info[i].map); 3220 } 3221 bus_dmamap_destroy(ss->rx_small.dmat, 3222 ss->rx_small.extra_map); 3223 bus_dma_tag_destroy(ss->rx_small.dmat); 3224 } 3225 free(ss->rx_small.info, M_DEVBUF); 3226 } 3227 ss->rx_small.info = NULL; 3228 3229 if (ss->rx_big.info != NULL) { 3230 if (ss->rx_big.dmat != NULL) { 3231 for (i = 0; i <= ss->rx_big.mask; i++) { 3232 bus_dmamap_destroy(ss->rx_big.dmat, 3233 ss->rx_big.info[i].map); 3234 } 3235 bus_dmamap_destroy(ss->rx_big.dmat, 3236 ss->rx_big.extra_map); 3237 bus_dma_tag_destroy(ss->rx_big.dmat); 3238 } 3239 free(ss->rx_big.info, M_DEVBUF); 3240 } 3241 ss->rx_big.info = NULL; 3242 } 3243 3244 static void 3245 mxge_free_rings(mxge_softc_t *sc) 3246 { 3247 int slice; 3248 3249 for (slice = 0; slice < sc->num_slices; slice++) 3250 mxge_free_slice_rings(&sc->ss[slice]); 3251 } 3252 3253 static int 3254 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3255 int tx_ring_entries) 3256 { 3257 mxge_softc_t *sc = ss->sc; 3258 size_t bytes; 3259 int err, i; 3260 3261 /* allocate per-slice receive resources */ 3262 3263 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3264 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3265 3266 /* allocate the rx shadow rings */ 3267 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3268 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3269 3270 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3271 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3272 3273 /* allocate the rx host info rings */ 3274 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3275 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3276 3277 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3278 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3279 3280 /* allocate the rx busdma resources */ 3281 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3282 1, /* alignment */ 3283 4096, /* boundary */ 3284 BUS_SPACE_MAXADDR, /* low */ 3285 BUS_SPACE_MAXADDR, /* high */ 3286 NULL, NULL, /* filter */ 3287 MHLEN, /* maxsize */ 3288 1, /* num segs */ 3289 MHLEN, /* maxsegsize */ 3290 BUS_DMA_ALLOCNOW, /* flags */ 3291 NULL, NULL, /* lock */ 3292 &ss->rx_small.dmat); /* tag */ 3293 if (err != 0) { 3294 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3295 err); 3296 return err; 3297 } 3298 3299 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3300 1, /* alignment */ 3301 #if MXGE_VIRT_JUMBOS 3302 4096, /* boundary */ 3303 #else 3304 0, /* boundary */ 3305 #endif 3306 BUS_SPACE_MAXADDR, /* low */ 3307 BUS_SPACE_MAXADDR, /* high */ 3308 NULL, NULL, /* filter */ 3309 3*4096, /* maxsize */ 3310 #if MXGE_VIRT_JUMBOS 3311 3, /* num segs */ 3312 4096, /* maxsegsize*/ 3313 #else 3314 1, /* num segs */ 3315 MJUM9BYTES, /* maxsegsize*/ 3316 #endif 3317 BUS_DMA_ALLOCNOW, /* flags */ 3318 NULL, NULL, /* lock */ 3319 &ss->rx_big.dmat); /* tag */ 3320 if (err != 0) { 3321 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3322 err); 3323 return err; 3324 } 3325 for (i = 0; i <= ss->rx_small.mask; i++) { 3326 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3327 &ss->rx_small.info[i].map); 3328 if (err != 0) { 3329 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3330 err); 3331 return err; 3332 } 3333 } 3334 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3335 &ss->rx_small.extra_map); 3336 if (err != 0) { 3337 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3338 err); 3339 return err; 3340 } 3341 3342 for (i = 0; i <= ss->rx_big.mask; i++) { 3343 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3344 &ss->rx_big.info[i].map); 3345 if (err != 0) { 3346 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3347 err); 3348 return err; 3349 } 3350 } 3351 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3352 &ss->rx_big.extra_map); 3353 if (err != 0) { 3354 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3355 err); 3356 return err; 3357 } 3358 3359 /* now allocate TX resources */ 3360 3361 #ifndef IFNET_BUF_RING 3362 /* only use a single TX ring for now */ 3363 if (ss != ss->sc->ss) 3364 return 0; 3365 #endif 3366 3367 ss->tx.mask = tx_ring_entries - 1; 3368 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3369 3370 /* allocate the tx request copy block */ 3371 bytes = 8 + 3372 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3373 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3374 /* ensure req_list entries are aligned to 8 bytes */ 3375 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3376 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3377 3378 /* allocate the tx busdma segment list */ 3379 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3380 ss->tx.seg_list = (bus_dma_segment_t *) 3381 malloc(bytes, M_DEVBUF, M_WAITOK); 3382 3383 /* allocate the tx host info ring */ 3384 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3385 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3386 3387 /* allocate the tx busdma resources */ 3388 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3389 1, /* alignment */ 3390 sc->tx_boundary, /* boundary */ 3391 BUS_SPACE_MAXADDR, /* low */ 3392 BUS_SPACE_MAXADDR, /* high */ 3393 NULL, NULL, /* filter */ 3394 65536 + 256, /* maxsize */ 3395 ss->tx.max_desc - 2, /* num segs */ 3396 sc->tx_boundary, /* maxsegsz */ 3397 BUS_DMA_ALLOCNOW, /* flags */ 3398 NULL, NULL, /* lock */ 3399 &ss->tx.dmat); /* tag */ 3400 3401 if (err != 0) { 3402 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3403 err); 3404 return err; 3405 } 3406 3407 /* now use these tags to setup dmamaps for each slot 3408 in the ring */ 3409 for (i = 0; i <= ss->tx.mask; i++) { 3410 err = bus_dmamap_create(ss->tx.dmat, 0, 3411 &ss->tx.info[i].map); 3412 if (err != 0) { 3413 device_printf(sc->dev, "Err %d tx dmamap\n", 3414 err); 3415 return err; 3416 } 3417 } 3418 return 0; 3419 3420 } 3421 3422 static int 3423 mxge_alloc_rings(mxge_softc_t *sc) 3424 { 3425 mxge_cmd_t cmd; 3426 int tx_ring_size; 3427 int tx_ring_entries, rx_ring_entries; 3428 int err, slice; 3429 3430 /* get ring sizes */ 3431 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3432 tx_ring_size = cmd.data0; 3433 if (err != 0) { 3434 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3435 goto abort; 3436 } 3437 3438 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3439 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3440 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3441 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3442 IFQ_SET_READY(&sc->ifp->if_snd); 3443 3444 for (slice = 0; slice < sc->num_slices; slice++) { 3445 err = mxge_alloc_slice_rings(&sc->ss[slice], 3446 rx_ring_entries, 3447 tx_ring_entries); 3448 if (err != 0) 3449 goto abort; 3450 } 3451 return 0; 3452 3453 abort: 3454 mxge_free_rings(sc); 3455 return err; 3456 3457 } 3458 3459 static void 3460 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3461 { 3462 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3463 3464 if (bufsize < MCLBYTES) { 3465 /* easy, everything fits in a single buffer */ 3466 *big_buf_size = MCLBYTES; 3467 *cl_size = MCLBYTES; 3468 *nbufs = 1; 3469 return; 3470 } 3471 3472 if (bufsize < MJUMPAGESIZE) { 3473 /* still easy, everything still fits in a single buffer */ 3474 *big_buf_size = MJUMPAGESIZE; 3475 *cl_size = MJUMPAGESIZE; 3476 *nbufs = 1; 3477 return; 3478 } 3479 #if MXGE_VIRT_JUMBOS 3480 /* now we need to use virtually contiguous buffers */ 3481 *cl_size = MJUM9BYTES; 3482 *big_buf_size = 4096; 3483 *nbufs = mtu / 4096 + 1; 3484 /* needs to be a power of two, so round up */ 3485 if (*nbufs == 3) 3486 *nbufs = 4; 3487 #else 3488 *cl_size = MJUM9BYTES; 3489 *big_buf_size = MJUM9BYTES; 3490 *nbufs = 1; 3491 #endif 3492 } 3493 3494 static int 3495 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3496 { 3497 mxge_softc_t *sc; 3498 mxge_cmd_t cmd; 3499 bus_dmamap_t map; 3500 int err, i, slice; 3501 3502 sc = ss->sc; 3503 slice = ss - sc->ss; 3504 3505 #if defined(INET) || defined(INET6) 3506 (void)tcp_lro_init(&ss->lc); 3507 #endif 3508 ss->lc.ifp = sc->ifp; 3509 3510 /* get the lanai pointers to the send and receive rings */ 3511 3512 err = 0; 3513 #ifndef IFNET_BUF_RING 3514 /* We currently only send from the first slice */ 3515 if (slice == 0) { 3516 #endif 3517 cmd.data0 = slice; 3518 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3519 ss->tx.lanai = 3520 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3521 ss->tx.send_go = (volatile uint32_t *) 3522 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3523 ss->tx.send_stop = (volatile uint32_t *) 3524 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3525 #ifndef IFNET_BUF_RING 3526 } 3527 #endif 3528 cmd.data0 = slice; 3529 err |= mxge_send_cmd(sc, 3530 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3531 ss->rx_small.lanai = 3532 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3533 cmd.data0 = slice; 3534 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3535 ss->rx_big.lanai = 3536 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3537 3538 if (err != 0) { 3539 device_printf(sc->dev, 3540 "failed to get ring sizes or locations\n"); 3541 return EIO; 3542 } 3543 3544 /* stock receive rings */ 3545 for (i = 0; i <= ss->rx_small.mask; i++) { 3546 map = ss->rx_small.info[i].map; 3547 err = mxge_get_buf_small(ss, map, i); 3548 if (err) { 3549 device_printf(sc->dev, "alloced %d/%d smalls\n", 3550 i, ss->rx_small.mask + 1); 3551 return ENOMEM; 3552 } 3553 } 3554 for (i = 0; i <= ss->rx_big.mask; i++) { 3555 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3556 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3557 } 3558 ss->rx_big.nbufs = nbufs; 3559 ss->rx_big.cl_size = cl_size; 3560 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3561 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3562 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3563 map = ss->rx_big.info[i].map; 3564 err = mxge_get_buf_big(ss, map, i); 3565 if (err) { 3566 device_printf(sc->dev, "alloced %d/%d bigs\n", 3567 i, ss->rx_big.mask + 1); 3568 return ENOMEM; 3569 } 3570 } 3571 return 0; 3572 } 3573 3574 static int 3575 mxge_open(mxge_softc_t *sc) 3576 { 3577 mxge_cmd_t cmd; 3578 int err, big_bytes, nbufs, slice, cl_size, i; 3579 bus_addr_t bus; 3580 volatile uint8_t *itable; 3581 struct mxge_slice_state *ss; 3582 3583 /* Copy the MAC address in case it was overridden */ 3584 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3585 3586 err = mxge_reset(sc, 1); 3587 if (err != 0) { 3588 device_printf(sc->dev, "failed to reset\n"); 3589 return EIO; 3590 } 3591 3592 if (sc->num_slices > 1) { 3593 /* setup the indirection table */ 3594 cmd.data0 = sc->num_slices; 3595 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3596 &cmd); 3597 3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3599 &cmd); 3600 if (err != 0) { 3601 device_printf(sc->dev, 3602 "failed to setup rss tables\n"); 3603 return err; 3604 } 3605 3606 /* just enable an identity mapping */ 3607 itable = sc->sram + cmd.data0; 3608 for (i = 0; i < sc->num_slices; i++) 3609 itable[i] = (uint8_t)i; 3610 3611 cmd.data0 = 1; 3612 cmd.data1 = mxge_rss_hash_type; 3613 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3614 if (err != 0) { 3615 device_printf(sc->dev, "failed to enable slices\n"); 3616 return err; 3617 } 3618 } 3619 3620 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3621 3622 cmd.data0 = nbufs; 3623 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3624 &cmd); 3625 /* error is only meaningful if we're trying to set 3626 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3627 if (err && nbufs > 1) { 3628 device_printf(sc->dev, 3629 "Failed to set alway-use-n to %d\n", 3630 nbufs); 3631 return EIO; 3632 } 3633 /* Give the firmware the mtu and the big and small buffer 3634 sizes. The firmware wants the big buf size to be a power 3635 of two. Luckily, FreeBSD's clusters are powers of two */ 3636 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3637 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3638 cmd.data0 = MHLEN - MXGEFW_PAD; 3639 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3640 &cmd); 3641 cmd.data0 = big_bytes; 3642 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3643 3644 if (err != 0) { 3645 device_printf(sc->dev, "failed to setup params\n"); 3646 goto abort; 3647 } 3648 3649 /* Now give him the pointer to the stats block */ 3650 for (slice = 0; 3651 #ifdef IFNET_BUF_RING 3652 slice < sc->num_slices; 3653 #else 3654 slice < 1; 3655 #endif 3656 slice++) { 3657 ss = &sc->ss[slice]; 3658 cmd.data0 = 3659 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3660 cmd.data1 = 3661 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3662 cmd.data2 = sizeof(struct mcp_irq_data); 3663 cmd.data2 |= (slice << 16); 3664 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3665 } 3666 3667 if (err != 0) { 3668 bus = sc->ss->fw_stats_dma.bus_addr; 3669 bus += offsetof(struct mcp_irq_data, send_done_count); 3670 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3671 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3672 err = mxge_send_cmd(sc, 3673 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3674 &cmd); 3675 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3676 sc->fw_multicast_support = 0; 3677 } else { 3678 sc->fw_multicast_support = 1; 3679 } 3680 3681 if (err != 0) { 3682 device_printf(sc->dev, "failed to setup params\n"); 3683 goto abort; 3684 } 3685 3686 for (slice = 0; slice < sc->num_slices; slice++) { 3687 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3688 if (err != 0) { 3689 device_printf(sc->dev, "couldn't open slice %d\n", 3690 slice); 3691 goto abort; 3692 } 3693 } 3694 3695 /* Finally, start the firmware running */ 3696 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3697 if (err) { 3698 device_printf(sc->dev, "Couldn't bring up link\n"); 3699 goto abort; 3700 } 3701 #ifdef IFNET_BUF_RING 3702 for (slice = 0; slice < sc->num_slices; slice++) { 3703 ss = &sc->ss[slice]; 3704 ss->if_drv_flags |= IFF_DRV_RUNNING; 3705 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3706 } 3707 #endif 3708 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3709 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3710 3711 return 0; 3712 3713 abort: 3714 mxge_free_mbufs(sc); 3715 3716 return err; 3717 } 3718 3719 static int 3720 mxge_close(mxge_softc_t *sc, int down) 3721 { 3722 mxge_cmd_t cmd; 3723 int err, old_down_cnt; 3724 #ifdef IFNET_BUF_RING 3725 struct mxge_slice_state *ss; 3726 int slice; 3727 #endif 3728 3729 #ifdef IFNET_BUF_RING 3730 for (slice = 0; slice < sc->num_slices; slice++) { 3731 ss = &sc->ss[slice]; 3732 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3733 } 3734 #endif 3735 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3736 if (!down) { 3737 old_down_cnt = sc->down_cnt; 3738 wmb(); 3739 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3740 if (err) { 3741 device_printf(sc->dev, 3742 "Couldn't bring down link\n"); 3743 } 3744 if (old_down_cnt == sc->down_cnt) { 3745 /* wait for down irq */ 3746 DELAY(10 * sc->intr_coal_delay); 3747 } 3748 wmb(); 3749 if (old_down_cnt == sc->down_cnt) { 3750 device_printf(sc->dev, "never got down irq\n"); 3751 } 3752 } 3753 mxge_free_mbufs(sc); 3754 3755 return 0; 3756 } 3757 3758 static void 3759 mxge_setup_cfg_space(mxge_softc_t *sc) 3760 { 3761 device_t dev = sc->dev; 3762 int reg; 3763 uint16_t lnk, pectl; 3764 3765 /* find the PCIe link width and set max read request to 4KB*/ 3766 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3767 lnk = pci_read_config(dev, reg + 0x12, 2); 3768 sc->link_width = (lnk >> 4) & 0x3f; 3769 3770 if (sc->pectl == 0) { 3771 pectl = pci_read_config(dev, reg + 0x8, 2); 3772 pectl = (pectl & ~0x7000) | (5 << 12); 3773 pci_write_config(dev, reg + 0x8, pectl, 2); 3774 sc->pectl = pectl; 3775 } else { 3776 /* restore saved pectl after watchdog reset */ 3777 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3778 } 3779 } 3780 3781 /* Enable DMA and Memory space access */ 3782 pci_enable_busmaster(dev); 3783 } 3784 3785 static uint32_t 3786 mxge_read_reboot(mxge_softc_t *sc) 3787 { 3788 device_t dev = sc->dev; 3789 uint32_t vs; 3790 3791 /* find the vendor specific offset */ 3792 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3793 device_printf(sc->dev, 3794 "could not find vendor specific offset\n"); 3795 return (uint32_t)-1; 3796 } 3797 /* enable read32 mode */ 3798 pci_write_config(dev, vs + 0x10, 0x3, 1); 3799 /* tell NIC which register to read */ 3800 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3801 return (pci_read_config(dev, vs + 0x14, 4)); 3802 } 3803 3804 static void 3805 mxge_watchdog_reset(mxge_softc_t *sc) 3806 { 3807 struct pci_devinfo *dinfo; 3808 struct mxge_slice_state *ss; 3809 int err, running, s, num_tx_slices = 1; 3810 uint32_t reboot; 3811 uint16_t cmd; 3812 3813 err = ENXIO; 3814 3815 device_printf(sc->dev, "Watchdog reset!\n"); 3816 3817 /* 3818 * check to see if the NIC rebooted. If it did, then all of 3819 * PCI config space has been reset, and things like the 3820 * busmaster bit will be zero. If this is the case, then we 3821 * must restore PCI config space before the NIC can be used 3822 * again 3823 */ 3824 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3825 if (cmd == 0xffff) { 3826 /* 3827 * maybe the watchdog caught the NIC rebooting; wait 3828 * up to 100ms for it to finish. If it does not come 3829 * back, then give up 3830 */ 3831 DELAY(1000*100); 3832 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3833 if (cmd == 0xffff) { 3834 device_printf(sc->dev, "NIC disappeared!\n"); 3835 } 3836 } 3837 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3838 /* print the reboot status */ 3839 reboot = mxge_read_reboot(sc); 3840 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3841 reboot); 3842 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3843 if (running) { 3844 /* 3845 * quiesce NIC so that TX routines will not try to 3846 * xmit after restoration of BAR 3847 */ 3848 3849 /* Mark the link as down */ 3850 if (sc->link_state) { 3851 sc->link_state = 0; 3852 if_link_state_change(sc->ifp, 3853 LINK_STATE_DOWN); 3854 } 3855 #ifdef IFNET_BUF_RING 3856 num_tx_slices = sc->num_slices; 3857 #endif 3858 /* grab all TX locks to ensure no tx */ 3859 for (s = 0; s < num_tx_slices; s++) { 3860 ss = &sc->ss[s]; 3861 mtx_lock(&ss->tx.mtx); 3862 } 3863 mxge_close(sc, 1); 3864 } 3865 /* restore PCI configuration space */ 3866 dinfo = device_get_ivars(sc->dev); 3867 pci_cfg_restore(sc->dev, dinfo); 3868 3869 /* and redo any changes we made to our config space */ 3870 mxge_setup_cfg_space(sc); 3871 3872 /* reload f/w */ 3873 err = mxge_load_firmware(sc, 0); 3874 if (err) { 3875 device_printf(sc->dev, 3876 "Unable to re-load f/w\n"); 3877 } 3878 if (running) { 3879 if (!err) 3880 err = mxge_open(sc); 3881 /* release all TX locks */ 3882 for (s = 0; s < num_tx_slices; s++) { 3883 ss = &sc->ss[s]; 3884 #ifdef IFNET_BUF_RING 3885 mxge_start_locked(ss); 3886 #endif 3887 mtx_unlock(&ss->tx.mtx); 3888 } 3889 } 3890 sc->watchdog_resets++; 3891 } else { 3892 device_printf(sc->dev, 3893 "NIC did not reboot, not resetting\n"); 3894 err = 0; 3895 } 3896 if (err) { 3897 device_printf(sc->dev, "watchdog reset failed\n"); 3898 } else { 3899 if (sc->dying == 2) 3900 sc->dying = 0; 3901 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3902 } 3903 } 3904 3905 static void 3906 mxge_watchdog_task(void *arg, int pending) 3907 { 3908 mxge_softc_t *sc = arg; 3909 3910 mtx_lock(&sc->driver_mtx); 3911 mxge_watchdog_reset(sc); 3912 mtx_unlock(&sc->driver_mtx); 3913 } 3914 3915 static void 3916 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3917 { 3918 tx = &sc->ss[slice].tx; 3919 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3920 device_printf(sc->dev, 3921 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3922 tx->req, tx->done, tx->queue_active); 3923 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3924 tx->activate, tx->deactivate); 3925 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3926 tx->pkt_done, 3927 be32toh(sc->ss->fw_stats->send_done_count)); 3928 } 3929 3930 static int 3931 mxge_watchdog(mxge_softc_t *sc) 3932 { 3933 mxge_tx_ring_t *tx; 3934 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3935 int i, err = 0; 3936 3937 /* see if we have outstanding transmits, which 3938 have been pending for more than mxge_ticks */ 3939 for (i = 0; 3940 #ifdef IFNET_BUF_RING 3941 (i < sc->num_slices) && (err == 0); 3942 #else 3943 (i < 1) && (err == 0); 3944 #endif 3945 i++) { 3946 tx = &sc->ss[i].tx; 3947 if (tx->req != tx->done && 3948 tx->watchdog_req != tx->watchdog_done && 3949 tx->done == tx->watchdog_done) { 3950 /* check for pause blocking before resetting */ 3951 if (tx->watchdog_rx_pause == rx_pause) { 3952 mxge_warn_stuck(sc, tx, i); 3953 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3954 return (ENXIO); 3955 } 3956 else 3957 device_printf(sc->dev, "Flow control blocking " 3958 "xmits, check link partner\n"); 3959 } 3960 3961 tx->watchdog_req = tx->req; 3962 tx->watchdog_done = tx->done; 3963 tx->watchdog_rx_pause = rx_pause; 3964 } 3965 3966 if (sc->need_media_probe) 3967 mxge_media_probe(sc); 3968 return (err); 3969 } 3970 3971 static uint64_t 3972 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 3973 { 3974 struct mxge_softc *sc; 3975 uint64_t rv; 3976 3977 sc = if_getsoftc(ifp); 3978 rv = 0; 3979 3980 switch (cnt) { 3981 case IFCOUNTER_IPACKETS: 3982 for (int s = 0; s < sc->num_slices; s++) 3983 rv += sc->ss[s].ipackets; 3984 return (rv); 3985 case IFCOUNTER_OPACKETS: 3986 for (int s = 0; s < sc->num_slices; s++) 3987 rv += sc->ss[s].opackets; 3988 return (rv); 3989 case IFCOUNTER_OERRORS: 3990 for (int s = 0; s < sc->num_slices; s++) 3991 rv += sc->ss[s].oerrors; 3992 return (rv); 3993 #ifdef IFNET_BUF_RING 3994 case IFCOUNTER_OBYTES: 3995 for (int s = 0; s < sc->num_slices; s++) 3996 rv += sc->ss[s].obytes; 3997 return (rv); 3998 case IFCOUNTER_OMCASTS: 3999 for (int s = 0; s < sc->num_slices; s++) 4000 rv += sc->ss[s].omcasts; 4001 return (rv); 4002 case IFCOUNTER_OQDROPS: 4003 for (int s = 0; s < sc->num_slices; s++) 4004 rv += sc->ss[s].tx.br->br_drops; 4005 return (rv); 4006 #endif 4007 default: 4008 return (if_get_counter_default(ifp, cnt)); 4009 } 4010 } 4011 4012 static void 4013 mxge_tick(void *arg) 4014 { 4015 mxge_softc_t *sc = arg; 4016 u_long pkts = 0; 4017 int err = 0; 4018 int running, ticks; 4019 uint16_t cmd; 4020 4021 ticks = mxge_ticks; 4022 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4023 if (running) { 4024 if (!sc->watchdog_countdown) { 4025 err = mxge_watchdog(sc); 4026 sc->watchdog_countdown = 4; 4027 } 4028 sc->watchdog_countdown--; 4029 } 4030 if (pkts == 0) { 4031 /* ensure NIC did not suffer h/w fault while idle */ 4032 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4033 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4034 sc->dying = 2; 4035 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4036 err = ENXIO; 4037 } 4038 /* look less often if NIC is idle */ 4039 ticks *= 4; 4040 } 4041 4042 if (err == 0) 4043 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4044 4045 } 4046 4047 static int 4048 mxge_media_change(struct ifnet *ifp) 4049 { 4050 return EINVAL; 4051 } 4052 4053 static int 4054 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4055 { 4056 struct ifnet *ifp = sc->ifp; 4057 int real_mtu, old_mtu; 4058 int err = 0; 4059 4060 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4061 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4062 return EINVAL; 4063 mtx_lock(&sc->driver_mtx); 4064 old_mtu = ifp->if_mtu; 4065 ifp->if_mtu = mtu; 4066 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4067 mxge_close(sc, 0); 4068 err = mxge_open(sc); 4069 if (err != 0) { 4070 ifp->if_mtu = old_mtu; 4071 mxge_close(sc, 0); 4072 (void) mxge_open(sc); 4073 } 4074 } 4075 mtx_unlock(&sc->driver_mtx); 4076 return err; 4077 } 4078 4079 static void 4080 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4081 { 4082 mxge_softc_t *sc = ifp->if_softc; 4083 4084 if (sc == NULL) 4085 return; 4086 ifmr->ifm_status = IFM_AVALID; 4087 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4088 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4089 ifmr->ifm_active |= sc->current_media; 4090 } 4091 4092 static int 4093 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 4094 { 4095 mxge_cmd_t cmd; 4096 uint32_t i2c_args; 4097 int i, ms, err; 4098 4099 if (i2c->dev_addr != 0xA0 && 4100 i2c->dev_addr != 0xA2) 4101 return (EINVAL); 4102 if (i2c->len > sizeof(i2c->data)) 4103 return (EINVAL); 4104 4105 for (i = 0; i < i2c->len; i++) { 4106 i2c_args = i2c->dev_addr << 0x8; 4107 i2c_args |= i2c->offset + i; 4108 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 4109 cmd.data1 = i2c_args; 4110 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 4111 4112 if (err != MXGEFW_CMD_OK) 4113 return (EIO); 4114 /* now we wait for the data to be cached */ 4115 cmd.data0 = i2c_args & 0xff; 4116 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4117 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 4118 cmd.data0 = i2c_args & 0xff; 4119 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4120 if (err == EBUSY) 4121 DELAY(1000); 4122 } 4123 if (err != MXGEFW_CMD_OK) 4124 return (EIO); 4125 i2c->data[i] = cmd.data0; 4126 } 4127 return (0); 4128 } 4129 4130 static int 4131 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4132 { 4133 mxge_softc_t *sc = ifp->if_softc; 4134 struct ifreq *ifr = (struct ifreq *)data; 4135 struct ifi2creq i2c; 4136 int err, mask; 4137 4138 err = 0; 4139 switch (command) { 4140 case SIOCSIFMTU: 4141 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4142 break; 4143 4144 case SIOCSIFFLAGS: 4145 mtx_lock(&sc->driver_mtx); 4146 if (sc->dying) { 4147 mtx_unlock(&sc->driver_mtx); 4148 return EINVAL; 4149 } 4150 if (ifp->if_flags & IFF_UP) { 4151 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4152 err = mxge_open(sc); 4153 } else { 4154 /* take care of promis can allmulti 4155 flag chages */ 4156 mxge_change_promisc(sc, 4157 ifp->if_flags & IFF_PROMISC); 4158 mxge_set_multicast_list(sc); 4159 } 4160 } else { 4161 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4162 mxge_close(sc, 0); 4163 } 4164 } 4165 mtx_unlock(&sc->driver_mtx); 4166 break; 4167 4168 case SIOCADDMULTI: 4169 case SIOCDELMULTI: 4170 mtx_lock(&sc->driver_mtx); 4171 if (sc->dying) { 4172 mtx_unlock(&sc->driver_mtx); 4173 return (EINVAL); 4174 } 4175 mxge_set_multicast_list(sc); 4176 mtx_unlock(&sc->driver_mtx); 4177 break; 4178 4179 case SIOCSIFCAP: 4180 mtx_lock(&sc->driver_mtx); 4181 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4182 if (mask & IFCAP_TXCSUM) { 4183 if (IFCAP_TXCSUM & ifp->if_capenable) { 4184 mask &= ~IFCAP_TSO4; 4185 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4186 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4187 } else { 4188 ifp->if_capenable |= IFCAP_TXCSUM; 4189 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4190 } 4191 } 4192 if (mask & IFCAP_RXCSUM) { 4193 if (IFCAP_RXCSUM & ifp->if_capenable) { 4194 ifp->if_capenable &= ~IFCAP_RXCSUM; 4195 } else { 4196 ifp->if_capenable |= IFCAP_RXCSUM; 4197 } 4198 } 4199 if (mask & IFCAP_TSO4) { 4200 if (IFCAP_TSO4 & ifp->if_capenable) { 4201 ifp->if_capenable &= ~IFCAP_TSO4; 4202 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4203 ifp->if_capenable |= IFCAP_TSO4; 4204 ifp->if_hwassist |= CSUM_TSO; 4205 } else { 4206 printf("mxge requires tx checksum offload" 4207 " be enabled to use TSO\n"); 4208 err = EINVAL; 4209 } 4210 } 4211 #if IFCAP_TSO6 4212 if (mask & IFCAP_TXCSUM_IPV6) { 4213 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4214 mask &= ~IFCAP_TSO6; 4215 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4216 | IFCAP_TSO6); 4217 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4218 | CSUM_UDP); 4219 } else { 4220 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4221 ifp->if_hwassist |= (CSUM_TCP_IPV6 4222 | CSUM_UDP_IPV6); 4223 } 4224 } 4225 if (mask & IFCAP_RXCSUM_IPV6) { 4226 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4227 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4228 } else { 4229 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4230 } 4231 } 4232 if (mask & IFCAP_TSO6) { 4233 if (IFCAP_TSO6 & ifp->if_capenable) { 4234 ifp->if_capenable &= ~IFCAP_TSO6; 4235 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4236 ifp->if_capenable |= IFCAP_TSO6; 4237 ifp->if_hwassist |= CSUM_TSO; 4238 } else { 4239 printf("mxge requires tx checksum offload" 4240 " be enabled to use TSO\n"); 4241 err = EINVAL; 4242 } 4243 } 4244 #endif /*IFCAP_TSO6 */ 4245 4246 if (mask & IFCAP_LRO) 4247 ifp->if_capenable ^= IFCAP_LRO; 4248 if (mask & IFCAP_VLAN_HWTAGGING) 4249 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4250 if (mask & IFCAP_VLAN_HWTSO) 4251 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4252 4253 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4254 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4255 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4256 4257 mtx_unlock(&sc->driver_mtx); 4258 VLAN_CAPABILITIES(ifp); 4259 4260 break; 4261 4262 case SIOCGIFMEDIA: 4263 mtx_lock(&sc->driver_mtx); 4264 if (sc->dying) { 4265 mtx_unlock(&sc->driver_mtx); 4266 return (EINVAL); 4267 } 4268 mxge_media_probe(sc); 4269 mtx_unlock(&sc->driver_mtx); 4270 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4271 &sc->media, command); 4272 break; 4273 4274 case SIOCGI2C: 4275 if (sc->connector != MXGE_XFP && 4276 sc->connector != MXGE_SFP) { 4277 err = ENXIO; 4278 break; 4279 } 4280 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4281 if (err != 0) 4282 break; 4283 mtx_lock(&sc->driver_mtx); 4284 if (sc->dying) { 4285 mtx_unlock(&sc->driver_mtx); 4286 return (EINVAL); 4287 } 4288 err = mxge_fetch_i2c(sc, &i2c); 4289 mtx_unlock(&sc->driver_mtx); 4290 if (err == 0) 4291 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4292 sizeof(i2c)); 4293 break; 4294 default: 4295 err = ether_ioctl(ifp, command, data); 4296 break; 4297 } 4298 return err; 4299 } 4300 4301 static void 4302 mxge_fetch_tunables(mxge_softc_t *sc) 4303 { 4304 4305 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4306 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4307 &mxge_flow_control); 4308 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4309 &mxge_intr_coal_delay); 4310 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4311 &mxge_nvidia_ecrc_enable); 4312 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4313 &mxge_force_firmware); 4314 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4315 &mxge_deassert_wait); 4316 TUNABLE_INT_FETCH("hw.mxge.verbose", 4317 &mxge_verbose); 4318 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4319 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4320 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4321 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4322 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4323 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4324 4325 if (bootverbose) 4326 mxge_verbose = 1; 4327 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4328 mxge_intr_coal_delay = 30; 4329 if (mxge_ticks == 0) 4330 mxge_ticks = hz / 2; 4331 sc->pause = mxge_flow_control; 4332 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4333 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4334 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4335 } 4336 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4337 mxge_initial_mtu < ETHER_MIN_LEN) 4338 mxge_initial_mtu = ETHERMTU_JUMBO; 4339 4340 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4341 mxge_throttle = MXGE_MAX_THROTTLE; 4342 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4343 mxge_throttle = MXGE_MIN_THROTTLE; 4344 sc->throttle = mxge_throttle; 4345 } 4346 4347 static void 4348 mxge_free_slices(mxge_softc_t *sc) 4349 { 4350 struct mxge_slice_state *ss; 4351 int i; 4352 4353 if (sc->ss == NULL) 4354 return; 4355 4356 for (i = 0; i < sc->num_slices; i++) { 4357 ss = &sc->ss[i]; 4358 if (ss->fw_stats != NULL) { 4359 mxge_dma_free(&ss->fw_stats_dma); 4360 ss->fw_stats = NULL; 4361 #ifdef IFNET_BUF_RING 4362 if (ss->tx.br != NULL) { 4363 drbr_free(ss->tx.br, M_DEVBUF); 4364 ss->tx.br = NULL; 4365 } 4366 #endif 4367 mtx_destroy(&ss->tx.mtx); 4368 } 4369 if (ss->rx_done.entry != NULL) { 4370 mxge_dma_free(&ss->rx_done.dma); 4371 ss->rx_done.entry = NULL; 4372 } 4373 } 4374 free(sc->ss, M_DEVBUF); 4375 sc->ss = NULL; 4376 } 4377 4378 static int 4379 mxge_alloc_slices(mxge_softc_t *sc) 4380 { 4381 mxge_cmd_t cmd; 4382 struct mxge_slice_state *ss; 4383 size_t bytes; 4384 int err, i, max_intr_slots; 4385 4386 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4387 if (err != 0) { 4388 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4389 return err; 4390 } 4391 sc->rx_ring_size = cmd.data0; 4392 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4393 4394 bytes = sizeof (*sc->ss) * sc->num_slices; 4395 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4396 if (sc->ss == NULL) 4397 return (ENOMEM); 4398 for (i = 0; i < sc->num_slices; i++) { 4399 ss = &sc->ss[i]; 4400 4401 ss->sc = sc; 4402 4403 /* allocate per-slice rx interrupt queues */ 4404 4405 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4406 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4407 if (err != 0) 4408 goto abort; 4409 ss->rx_done.entry = ss->rx_done.dma.addr; 4410 bzero(ss->rx_done.entry, bytes); 4411 4412 /* 4413 * allocate the per-slice firmware stats; stats 4414 * (including tx) are used used only on the first 4415 * slice for now 4416 */ 4417 #ifndef IFNET_BUF_RING 4418 if (i > 0) 4419 continue; 4420 #endif 4421 4422 bytes = sizeof (*ss->fw_stats); 4423 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4424 sizeof (*ss->fw_stats), 64); 4425 if (err != 0) 4426 goto abort; 4427 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4428 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4429 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4430 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4431 #ifdef IFNET_BUF_RING 4432 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4433 &ss->tx.mtx); 4434 #endif 4435 } 4436 4437 return (0); 4438 4439 abort: 4440 mxge_free_slices(sc); 4441 return (ENOMEM); 4442 } 4443 4444 static void 4445 mxge_slice_probe(mxge_softc_t *sc) 4446 { 4447 mxge_cmd_t cmd; 4448 char *old_fw; 4449 int msix_cnt, status, max_intr_slots; 4450 4451 sc->num_slices = 1; 4452 /* 4453 * don't enable multiple slices if they are not enabled, 4454 * or if this is not an SMP system 4455 */ 4456 4457 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4458 return; 4459 4460 /* see how many MSI-X interrupts are available */ 4461 msix_cnt = pci_msix_count(sc->dev); 4462 if (msix_cnt < 2) 4463 return; 4464 4465 /* now load the slice aware firmware see what it supports */ 4466 old_fw = sc->fw_name; 4467 if (old_fw == mxge_fw_aligned) 4468 sc->fw_name = mxge_fw_rss_aligned; 4469 else 4470 sc->fw_name = mxge_fw_rss_unaligned; 4471 status = mxge_load_firmware(sc, 0); 4472 if (status != 0) { 4473 device_printf(sc->dev, "Falling back to a single slice\n"); 4474 return; 4475 } 4476 4477 /* try to send a reset command to the card to see if it 4478 is alive */ 4479 memset(&cmd, 0, sizeof (cmd)); 4480 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4481 if (status != 0) { 4482 device_printf(sc->dev, "failed reset\n"); 4483 goto abort_with_fw; 4484 } 4485 4486 /* get rx ring size */ 4487 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4488 if (status != 0) { 4489 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4490 goto abort_with_fw; 4491 } 4492 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4493 4494 /* tell it the size of the interrupt queues */ 4495 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4496 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4497 if (status != 0) { 4498 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4499 goto abort_with_fw; 4500 } 4501 4502 /* ask the maximum number of slices it supports */ 4503 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4504 if (status != 0) { 4505 device_printf(sc->dev, 4506 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4507 goto abort_with_fw; 4508 } 4509 sc->num_slices = cmd.data0; 4510 if (sc->num_slices > msix_cnt) 4511 sc->num_slices = msix_cnt; 4512 4513 if (mxge_max_slices == -1) { 4514 /* cap to number of CPUs in system */ 4515 if (sc->num_slices > mp_ncpus) 4516 sc->num_slices = mp_ncpus; 4517 } else { 4518 if (sc->num_slices > mxge_max_slices) 4519 sc->num_slices = mxge_max_slices; 4520 } 4521 /* make sure it is a power of two */ 4522 while (sc->num_slices & (sc->num_slices - 1)) 4523 sc->num_slices--; 4524 4525 if (mxge_verbose) 4526 device_printf(sc->dev, "using %d slices\n", 4527 sc->num_slices); 4528 4529 return; 4530 4531 abort_with_fw: 4532 sc->fw_name = old_fw; 4533 (void) mxge_load_firmware(sc, 0); 4534 } 4535 4536 static int 4537 mxge_add_msix_irqs(mxge_softc_t *sc) 4538 { 4539 size_t bytes; 4540 int count, err, i, rid; 4541 4542 rid = PCIR_BAR(2); 4543 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4544 &rid, RF_ACTIVE); 4545 4546 if (sc->msix_table_res == NULL) { 4547 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4548 return ENXIO; 4549 } 4550 4551 count = sc->num_slices; 4552 err = pci_alloc_msix(sc->dev, &count); 4553 if (err != 0) { 4554 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4555 "err = %d \n", sc->num_slices, err); 4556 goto abort_with_msix_table; 4557 } 4558 if (count < sc->num_slices) { 4559 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4560 count, sc->num_slices); 4561 device_printf(sc->dev, 4562 "Try setting hw.mxge.max_slices to %d\n", 4563 count); 4564 err = ENOSPC; 4565 goto abort_with_msix; 4566 } 4567 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4568 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4569 if (sc->msix_irq_res == NULL) { 4570 err = ENOMEM; 4571 goto abort_with_msix; 4572 } 4573 4574 for (i = 0; i < sc->num_slices; i++) { 4575 rid = i + 1; 4576 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4577 SYS_RES_IRQ, 4578 &rid, RF_ACTIVE); 4579 if (sc->msix_irq_res[i] == NULL) { 4580 device_printf(sc->dev, "couldn't allocate IRQ res" 4581 " for message %d\n", i); 4582 err = ENXIO; 4583 goto abort_with_res; 4584 } 4585 } 4586 4587 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4588 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4589 4590 for (i = 0; i < sc->num_slices; i++) { 4591 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4592 INTR_TYPE_NET | INTR_MPSAFE, 4593 #if __FreeBSD_version > 700030 4594 NULL, 4595 #endif 4596 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4597 if (err != 0) { 4598 device_printf(sc->dev, "couldn't setup intr for " 4599 "message %d\n", i); 4600 goto abort_with_intr; 4601 } 4602 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4603 sc->msix_ih[i], "s%d", i); 4604 } 4605 4606 if (mxge_verbose) { 4607 device_printf(sc->dev, "using %d msix IRQs:", 4608 sc->num_slices); 4609 for (i = 0; i < sc->num_slices; i++) 4610 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4611 printf("\n"); 4612 } 4613 return (0); 4614 4615 abort_with_intr: 4616 for (i = 0; i < sc->num_slices; i++) { 4617 if (sc->msix_ih[i] != NULL) { 4618 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4619 sc->msix_ih[i]); 4620 sc->msix_ih[i] = NULL; 4621 } 4622 } 4623 free(sc->msix_ih, M_DEVBUF); 4624 4625 abort_with_res: 4626 for (i = 0; i < sc->num_slices; i++) { 4627 rid = i + 1; 4628 if (sc->msix_irq_res[i] != NULL) 4629 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4630 sc->msix_irq_res[i]); 4631 sc->msix_irq_res[i] = NULL; 4632 } 4633 free(sc->msix_irq_res, M_DEVBUF); 4634 4635 abort_with_msix: 4636 pci_release_msi(sc->dev); 4637 4638 abort_with_msix_table: 4639 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4640 sc->msix_table_res); 4641 4642 return err; 4643 } 4644 4645 static int 4646 mxge_add_single_irq(mxge_softc_t *sc) 4647 { 4648 int count, err, rid; 4649 4650 count = pci_msi_count(sc->dev); 4651 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4652 rid = 1; 4653 } else { 4654 rid = 0; 4655 sc->legacy_irq = 1; 4656 } 4657 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4658 RF_SHAREABLE | RF_ACTIVE); 4659 if (sc->irq_res == NULL) { 4660 device_printf(sc->dev, "could not alloc interrupt\n"); 4661 return ENXIO; 4662 } 4663 if (mxge_verbose) 4664 device_printf(sc->dev, "using %s irq %jd\n", 4665 sc->legacy_irq ? "INTx" : "MSI", 4666 rman_get_start(sc->irq_res)); 4667 err = bus_setup_intr(sc->dev, sc->irq_res, 4668 INTR_TYPE_NET | INTR_MPSAFE, 4669 #if __FreeBSD_version > 700030 4670 NULL, 4671 #endif 4672 mxge_intr, &sc->ss[0], &sc->ih); 4673 if (err != 0) { 4674 bus_release_resource(sc->dev, SYS_RES_IRQ, 4675 sc->legacy_irq ? 0 : 1, sc->irq_res); 4676 if (!sc->legacy_irq) 4677 pci_release_msi(sc->dev); 4678 } 4679 return err; 4680 } 4681 4682 static void 4683 mxge_rem_msix_irqs(mxge_softc_t *sc) 4684 { 4685 int i, rid; 4686 4687 for (i = 0; i < sc->num_slices; i++) { 4688 if (sc->msix_ih[i] != NULL) { 4689 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4690 sc->msix_ih[i]); 4691 sc->msix_ih[i] = NULL; 4692 } 4693 } 4694 free(sc->msix_ih, M_DEVBUF); 4695 4696 for (i = 0; i < sc->num_slices; i++) { 4697 rid = i + 1; 4698 if (sc->msix_irq_res[i] != NULL) 4699 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4700 sc->msix_irq_res[i]); 4701 sc->msix_irq_res[i] = NULL; 4702 } 4703 free(sc->msix_irq_res, M_DEVBUF); 4704 4705 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4706 sc->msix_table_res); 4707 4708 pci_release_msi(sc->dev); 4709 return; 4710 } 4711 4712 static void 4713 mxge_rem_single_irq(mxge_softc_t *sc) 4714 { 4715 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4716 bus_release_resource(sc->dev, SYS_RES_IRQ, 4717 sc->legacy_irq ? 0 : 1, sc->irq_res); 4718 if (!sc->legacy_irq) 4719 pci_release_msi(sc->dev); 4720 } 4721 4722 static void 4723 mxge_rem_irq(mxge_softc_t *sc) 4724 { 4725 if (sc->num_slices > 1) 4726 mxge_rem_msix_irqs(sc); 4727 else 4728 mxge_rem_single_irq(sc); 4729 } 4730 4731 static int 4732 mxge_add_irq(mxge_softc_t *sc) 4733 { 4734 int err; 4735 4736 if (sc->num_slices > 1) 4737 err = mxge_add_msix_irqs(sc); 4738 else 4739 err = mxge_add_single_irq(sc); 4740 4741 if (0 && err == 0 && sc->num_slices > 1) { 4742 mxge_rem_msix_irqs(sc); 4743 err = mxge_add_msix_irqs(sc); 4744 } 4745 return err; 4746 } 4747 4748 static int 4749 mxge_attach(device_t dev) 4750 { 4751 mxge_cmd_t cmd; 4752 mxge_softc_t *sc = device_get_softc(dev); 4753 struct ifnet *ifp; 4754 int err, rid; 4755 4756 sc->dev = dev; 4757 mxge_fetch_tunables(sc); 4758 4759 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4760 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4761 taskqueue_thread_enqueue, &sc->tq); 4762 if (sc->tq == NULL) { 4763 err = ENOMEM; 4764 goto abort_with_nothing; 4765 } 4766 4767 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4768 1, /* alignment */ 4769 0, /* boundary */ 4770 BUS_SPACE_MAXADDR, /* low */ 4771 BUS_SPACE_MAXADDR, /* high */ 4772 NULL, NULL, /* filter */ 4773 65536 + 256, /* maxsize */ 4774 MXGE_MAX_SEND_DESC, /* num segs */ 4775 65536, /* maxsegsize */ 4776 0, /* flags */ 4777 NULL, NULL, /* lock */ 4778 &sc->parent_dmat); /* tag */ 4779 4780 if (err != 0) { 4781 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4782 err); 4783 goto abort_with_tq; 4784 } 4785 4786 ifp = sc->ifp = if_alloc(IFT_ETHER); 4787 if (ifp == NULL) { 4788 device_printf(dev, "can not if_alloc()\n"); 4789 err = ENOSPC; 4790 goto abort_with_parent_dmat; 4791 } 4792 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4793 4794 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4795 device_get_nameunit(dev)); 4796 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4797 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4798 "%s:drv", device_get_nameunit(dev)); 4799 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4800 MTX_NETWORK_LOCK, MTX_DEF); 4801 4802 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4803 4804 mxge_setup_cfg_space(sc); 4805 4806 /* Map the board into the kernel */ 4807 rid = PCIR_BARS; 4808 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4809 RF_ACTIVE); 4810 if (sc->mem_res == NULL) { 4811 device_printf(dev, "could not map memory\n"); 4812 err = ENXIO; 4813 goto abort_with_lock; 4814 } 4815 sc->sram = rman_get_virtual(sc->mem_res); 4816 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4817 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4818 device_printf(dev, "impossible memory region size %jd\n", 4819 rman_get_size(sc->mem_res)); 4820 err = ENXIO; 4821 goto abort_with_mem_res; 4822 } 4823 4824 /* make NULL terminated copy of the EEPROM strings section of 4825 lanai SRAM */ 4826 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4827 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4828 rman_get_bushandle(sc->mem_res), 4829 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4830 sc->eeprom_strings, 4831 MXGE_EEPROM_STRINGS_SIZE - 2); 4832 err = mxge_parse_strings(sc); 4833 if (err != 0) 4834 goto abort_with_mem_res; 4835 4836 /* Enable write combining for efficient use of PCIe bus */ 4837 mxge_enable_wc(sc); 4838 4839 /* Allocate the out of band dma memory */ 4840 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4841 sizeof (mxge_cmd_t), 64); 4842 if (err != 0) 4843 goto abort_with_mem_res; 4844 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4845 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4846 if (err != 0) 4847 goto abort_with_cmd_dma; 4848 4849 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4850 if (err != 0) 4851 goto abort_with_zeropad_dma; 4852 4853 /* select & load the firmware */ 4854 err = mxge_select_firmware(sc); 4855 if (err != 0) 4856 goto abort_with_dmabench; 4857 sc->intr_coal_delay = mxge_intr_coal_delay; 4858 4859 mxge_slice_probe(sc); 4860 err = mxge_alloc_slices(sc); 4861 if (err != 0) 4862 goto abort_with_dmabench; 4863 4864 err = mxge_reset(sc, 0); 4865 if (err != 0) 4866 goto abort_with_slices; 4867 4868 err = mxge_alloc_rings(sc); 4869 if (err != 0) { 4870 device_printf(sc->dev, "failed to allocate rings\n"); 4871 goto abort_with_slices; 4872 } 4873 4874 err = mxge_add_irq(sc); 4875 if (err != 0) { 4876 device_printf(sc->dev, "failed to add irq\n"); 4877 goto abort_with_rings; 4878 } 4879 4880 ifp->if_baudrate = IF_Gbps(10); 4881 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4882 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4883 IFCAP_RXCSUM_IPV6; 4884 #if defined(INET) || defined(INET6) 4885 ifp->if_capabilities |= IFCAP_LRO; 4886 #endif 4887 4888 #ifdef MXGE_NEW_VLAN_API 4889 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4890 4891 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4892 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4893 sc->fw_ver_tiny >= 32) 4894 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4895 #endif 4896 sc->max_mtu = mxge_max_mtu(sc); 4897 if (sc->max_mtu >= 9000) 4898 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4899 else 4900 device_printf(dev, "MTU limited to %d. Install " 4901 "latest firmware for 9000 byte jumbo support\n", 4902 sc->max_mtu - ETHER_HDR_LEN); 4903 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4904 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4905 /* check to see if f/w supports TSO for IPv6 */ 4906 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4907 if (CSUM_TCP_IPV6) 4908 ifp->if_capabilities |= IFCAP_TSO6; 4909 sc->max_tso6_hlen = min(cmd.data0, 4910 sizeof (sc->ss[0].scratch)); 4911 } 4912 ifp->if_capenable = ifp->if_capabilities; 4913 if (sc->lro_cnt == 0) 4914 ifp->if_capenable &= ~IFCAP_LRO; 4915 ifp->if_init = mxge_init; 4916 ifp->if_softc = sc; 4917 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4918 ifp->if_ioctl = mxge_ioctl; 4919 ifp->if_start = mxge_start; 4920 ifp->if_get_counter = mxge_get_counter; 4921 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4922 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4923 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4924 /* Initialise the ifmedia structure */ 4925 ifmedia_init(&sc->media, 0, mxge_media_change, 4926 mxge_media_status); 4927 mxge_media_init(sc); 4928 mxge_media_probe(sc); 4929 sc->dying = 0; 4930 ether_ifattach(ifp, sc->mac_addr); 4931 /* ether_ifattach sets mtu to ETHERMTU */ 4932 if (mxge_initial_mtu != ETHERMTU) 4933 mxge_change_mtu(sc, mxge_initial_mtu); 4934 4935 mxge_add_sysctls(sc); 4936 #ifdef IFNET_BUF_RING 4937 ifp->if_transmit = mxge_transmit; 4938 ifp->if_qflush = mxge_qflush; 4939 #endif 4940 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4941 device_get_nameunit(sc->dev)); 4942 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4943 return 0; 4944 4945 abort_with_rings: 4946 mxge_free_rings(sc); 4947 abort_with_slices: 4948 mxge_free_slices(sc); 4949 abort_with_dmabench: 4950 mxge_dma_free(&sc->dmabench_dma); 4951 abort_with_zeropad_dma: 4952 mxge_dma_free(&sc->zeropad_dma); 4953 abort_with_cmd_dma: 4954 mxge_dma_free(&sc->cmd_dma); 4955 abort_with_mem_res: 4956 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4957 abort_with_lock: 4958 pci_disable_busmaster(dev); 4959 mtx_destroy(&sc->cmd_mtx); 4960 mtx_destroy(&sc->driver_mtx); 4961 if_free(ifp); 4962 abort_with_parent_dmat: 4963 bus_dma_tag_destroy(sc->parent_dmat); 4964 abort_with_tq: 4965 if (sc->tq != NULL) { 4966 taskqueue_drain(sc->tq, &sc->watchdog_task); 4967 taskqueue_free(sc->tq); 4968 sc->tq = NULL; 4969 } 4970 abort_with_nothing: 4971 return err; 4972 } 4973 4974 static int 4975 mxge_detach(device_t dev) 4976 { 4977 mxge_softc_t *sc = device_get_softc(dev); 4978 4979 if (mxge_vlans_active(sc)) { 4980 device_printf(sc->dev, 4981 "Detach vlans before removing module\n"); 4982 return EBUSY; 4983 } 4984 mtx_lock(&sc->driver_mtx); 4985 sc->dying = 1; 4986 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4987 mxge_close(sc, 0); 4988 mtx_unlock(&sc->driver_mtx); 4989 ether_ifdetach(sc->ifp); 4990 if (sc->tq != NULL) { 4991 taskqueue_drain(sc->tq, &sc->watchdog_task); 4992 taskqueue_free(sc->tq); 4993 sc->tq = NULL; 4994 } 4995 callout_drain(&sc->co_hdl); 4996 ifmedia_removeall(&sc->media); 4997 mxge_dummy_rdma(sc, 0); 4998 mxge_rem_sysctls(sc); 4999 mxge_rem_irq(sc); 5000 mxge_free_rings(sc); 5001 mxge_free_slices(sc); 5002 mxge_dma_free(&sc->dmabench_dma); 5003 mxge_dma_free(&sc->zeropad_dma); 5004 mxge_dma_free(&sc->cmd_dma); 5005 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5006 pci_disable_busmaster(dev); 5007 mtx_destroy(&sc->cmd_mtx); 5008 mtx_destroy(&sc->driver_mtx); 5009 if_free(sc->ifp); 5010 bus_dma_tag_destroy(sc->parent_dmat); 5011 return 0; 5012 } 5013 5014 static int 5015 mxge_shutdown(device_t dev) 5016 { 5017 return 0; 5018 } 5019 5020 /* 5021 This file uses Myri10GE driver indentation. 5022 5023 Local Variables: 5024 c-file-style:"linux" 5025 tab-width:8 5026 End: 5027 */ 5028