1 /****************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2006-2013, Myricom Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Myricom Inc, nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/sockio.h> 40 #include <sys/mbuf.h> 41 #include <sys/malloc.h> 42 #include <sys/kdb.h> 43 #include <sys/kernel.h> 44 #include <sys/lock.h> 45 #include <sys/module.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 #include <sys/taskqueue.h> 50 #include <contrib/zlib/zlib.h> 51 #include <dev/zlib/zcalloc.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_lro.h> 71 #include <netinet6/ip6_var.h> 72 73 #include <machine/bus.h> 74 #include <machine/in_cksum.h> 75 #include <machine/resource.h> 76 #include <sys/bus.h> 77 #include <sys/rman.h> 78 #include <sys/smp.h> 79 80 #include <dev/pci/pcireg.h> 81 #include <dev/pci/pcivar.h> 82 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 83 84 #include <vm/vm.h> /* for pmap_mapdev() */ 85 #include <vm/pmap.h> 86 87 #if defined(__i386) || defined(__amd64) 88 #include <machine/specialreg.h> 89 #endif 90 91 #include <dev/mxge/mxge_mcp.h> 92 #include <dev/mxge/mcp_gen_header.h> 93 /*#define MXGE_FAKE_IFP*/ 94 #include <dev/mxge/if_mxge_var.h> 95 #ifdef IFNET_BUF_RING 96 #include <sys/buf_ring.h> 97 #endif 98 99 #include "opt_inet.h" 100 #include "opt_inet6.h" 101 102 /* tunable params */ 103 static int mxge_nvidia_ecrc_enable = 1; 104 static int mxge_force_firmware = 0; 105 static int mxge_intr_coal_delay = 30; 106 static int mxge_deassert_wait = 1; 107 static int mxge_flow_control = 1; 108 static int mxge_verbose = 0; 109 static int mxge_ticks; 110 static int mxge_max_slices = 1; 111 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 112 static int mxge_always_promisc = 0; 113 static int mxge_initial_mtu = ETHERMTU_JUMBO; 114 static int mxge_throttle = 0; 115 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 116 static char *mxge_fw_aligned = "mxge_eth_z8e"; 117 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 118 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 119 120 static int mxge_probe(device_t dev); 121 static int mxge_attach(device_t dev); 122 static int mxge_detach(device_t dev); 123 static int mxge_shutdown(device_t dev); 124 static void mxge_intr(void *arg); 125 126 static device_method_t mxge_methods[] = 127 { 128 /* Device interface */ 129 DEVMETHOD(device_probe, mxge_probe), 130 DEVMETHOD(device_attach, mxge_attach), 131 DEVMETHOD(device_detach, mxge_detach), 132 DEVMETHOD(device_shutdown, mxge_shutdown), 133 134 DEVMETHOD_END 135 }; 136 137 static driver_t mxge_driver = 138 { 139 "mxge", 140 mxge_methods, 141 sizeof(mxge_softc_t), 142 }; 143 144 static devclass_t mxge_devclass; 145 146 /* Declare ourselves to be a child of the PCI bus.*/ 147 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 148 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 149 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 150 151 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 152 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 153 static int mxge_close(mxge_softc_t *sc, int down); 154 static int mxge_open(mxge_softc_t *sc); 155 static void mxge_tick(void *arg); 156 157 static int 158 mxge_probe(device_t dev) 159 { 160 int rev; 161 162 163 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 164 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 165 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 166 rev = pci_get_revid(dev); 167 switch (rev) { 168 case MXGE_PCI_REV_Z8E: 169 device_set_desc(dev, "Myri10G-PCIE-8A"); 170 break; 171 case MXGE_PCI_REV_Z8ES: 172 device_set_desc(dev, "Myri10G-PCIE-8B"); 173 break; 174 default: 175 device_set_desc(dev, "Myri10G-PCIE-8??"); 176 device_printf(dev, "Unrecognized rev %d NIC\n", 177 rev); 178 break; 179 } 180 return 0; 181 } 182 return ENXIO; 183 } 184 185 static void 186 mxge_enable_wc(mxge_softc_t *sc) 187 { 188 #if defined(__i386) || defined(__amd64) 189 vm_offset_t len; 190 int err; 191 192 sc->wc = 1; 193 len = rman_get_size(sc->mem_res); 194 err = pmap_change_attr((vm_offset_t) sc->sram, 195 len, PAT_WRITE_COMBINING); 196 if (err != 0) { 197 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 198 err); 199 sc->wc = 0; 200 } 201 #endif 202 } 203 204 205 /* callback to get our DMA address */ 206 static void 207 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 208 int error) 209 { 210 if (error == 0) { 211 *(bus_addr_t *) arg = segs->ds_addr; 212 } 213 } 214 215 static int 216 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 217 bus_size_t alignment) 218 { 219 int err; 220 device_t dev = sc->dev; 221 bus_size_t boundary, maxsegsize; 222 223 if (bytes > 4096 && alignment == 4096) { 224 boundary = 0; 225 maxsegsize = bytes; 226 } else { 227 boundary = 4096; 228 maxsegsize = 4096; 229 } 230 231 /* allocate DMAable memory tags */ 232 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 233 alignment, /* alignment */ 234 boundary, /* boundary */ 235 BUS_SPACE_MAXADDR, /* low */ 236 BUS_SPACE_MAXADDR, /* high */ 237 NULL, NULL, /* filter */ 238 bytes, /* maxsize */ 239 1, /* num segs */ 240 maxsegsize, /* maxsegsize */ 241 BUS_DMA_COHERENT, /* flags */ 242 NULL, NULL, /* lock */ 243 &dma->dmat); /* tag */ 244 if (err != 0) { 245 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 246 return err; 247 } 248 249 /* allocate DMAable memory & map */ 250 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 251 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 252 | BUS_DMA_ZERO), &dma->map); 253 if (err != 0) { 254 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 255 goto abort_with_dmat; 256 } 257 258 /* load the memory */ 259 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 260 mxge_dmamap_callback, 261 (void *)&dma->bus_addr, 0); 262 if (err != 0) { 263 device_printf(dev, "couldn't load map (err = %d)\n", err); 264 goto abort_with_mem; 265 } 266 return 0; 267 268 abort_with_mem: 269 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 270 abort_with_dmat: 271 (void)bus_dma_tag_destroy(dma->dmat); 272 return err; 273 } 274 275 276 static void 277 mxge_dma_free(mxge_dma_t *dma) 278 { 279 bus_dmamap_unload(dma->dmat, dma->map); 280 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 281 (void)bus_dma_tag_destroy(dma->dmat); 282 } 283 284 /* 285 * The eeprom strings on the lanaiX have the format 286 * SN=x\0 287 * MAC=x:x:x:x:x:x\0 288 * PC=text\0 289 */ 290 291 static int 292 mxge_parse_strings(mxge_softc_t *sc) 293 { 294 char *ptr; 295 int i, found_mac, found_sn2; 296 char *endptr; 297 298 ptr = sc->eeprom_strings; 299 found_mac = 0; 300 found_sn2 = 0; 301 while (*ptr != '\0') { 302 if (strncmp(ptr, "MAC=", 4) == 0) { 303 ptr += 4; 304 for (i = 0;;) { 305 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 306 if (endptr - ptr != 2) 307 goto abort; 308 ptr = endptr; 309 if (++i == 6) 310 break; 311 if (*ptr++ != ':') 312 goto abort; 313 } 314 found_mac = 1; 315 } else if (strncmp(ptr, "PC=", 3) == 0) { 316 ptr += 3; 317 strlcpy(sc->product_code_string, ptr, 318 sizeof(sc->product_code_string)); 319 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 320 ptr += 3; 321 strlcpy(sc->serial_number_string, ptr, 322 sizeof(sc->serial_number_string)); 323 } else if (strncmp(ptr, "SN2=", 4) == 0) { 324 /* SN2 takes precedence over SN */ 325 ptr += 4; 326 found_sn2 = 1; 327 strlcpy(sc->serial_number_string, ptr, 328 sizeof(sc->serial_number_string)); 329 } 330 while (*ptr++ != '\0') {} 331 } 332 333 if (found_mac) 334 return 0; 335 336 abort: 337 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 338 339 return ENXIO; 340 } 341 342 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 343 static void 344 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 345 { 346 uint32_t val; 347 unsigned long base, off; 348 char *va, *cfgptr; 349 device_t pdev, mcp55; 350 uint16_t vendor_id, device_id, word; 351 uintptr_t bus, slot, func, ivend, idev; 352 uint32_t *ptr32; 353 354 355 if (!mxge_nvidia_ecrc_enable) 356 return; 357 358 pdev = device_get_parent(device_get_parent(sc->dev)); 359 if (pdev == NULL) { 360 device_printf(sc->dev, "could not find parent?\n"); 361 return; 362 } 363 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 364 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 365 366 if (vendor_id != 0x10de) 367 return; 368 369 base = 0; 370 371 if (device_id == 0x005d) { 372 /* ck804, base address is magic */ 373 base = 0xe0000000UL; 374 } else if (device_id >= 0x0374 && device_id <= 0x378) { 375 /* mcp55, base address stored in chipset */ 376 mcp55 = pci_find_bsf(0, 0, 0); 377 if (mcp55 && 378 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 379 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 380 word = pci_read_config(mcp55, 0x90, 2); 381 base = ((unsigned long)word & 0x7ffeU) << 25; 382 } 383 } 384 if (!base) 385 return; 386 387 /* XXXX 388 Test below is commented because it is believed that doing 389 config read/write beyond 0xff will access the config space 390 for the next larger function. Uncomment this and remove 391 the hacky pmap_mapdev() way of accessing config space when 392 FreeBSD grows support for extended pcie config space access 393 */ 394 #if 0 395 /* See if we can, by some miracle, access the extended 396 config space */ 397 val = pci_read_config(pdev, 0x178, 4); 398 if (val != 0xffffffff) { 399 val |= 0x40; 400 pci_write_config(pdev, 0x178, val, 4); 401 return; 402 } 403 #endif 404 /* Rather than using normal pci config space writes, we must 405 * map the Nvidia config space ourselves. This is because on 406 * opteron/nvidia class machine the 0xe000000 mapping is 407 * handled by the nvidia chipset, that means the internal PCI 408 * device (the on-chip northbridge), or the amd-8131 bridge 409 * and things behind them are not visible by this method. 410 */ 411 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_BUS, &bus); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_SLOT, &slot); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_FUNCTION, &func); 418 BUS_READ_IVAR(device_get_parent(pdev), pdev, 419 PCI_IVAR_VENDOR, &ivend); 420 BUS_READ_IVAR(device_get_parent(pdev), pdev, 421 PCI_IVAR_DEVICE, &idev); 422 423 off = base 424 + 0x00100000UL * (unsigned long)bus 425 + 0x00001000UL * (unsigned long)(func 426 + 8 * slot); 427 428 /* map it into the kernel */ 429 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 430 431 432 if (va == NULL) { 433 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 434 return; 435 } 436 /* get a pointer to the config space mapped into the kernel */ 437 cfgptr = va + (off & PAGE_MASK); 438 439 /* make sure that we can really access it */ 440 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 441 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 442 if (! (vendor_id == ivend && device_id == idev)) { 443 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 444 vendor_id, device_id); 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 return; 447 } 448 449 ptr32 = (uint32_t*)(cfgptr + 0x178); 450 val = *ptr32; 451 452 if (val == 0xffffffff) { 453 device_printf(sc->dev, "extended mapping failed\n"); 454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 455 return; 456 } 457 *ptr32 = val | 0x40; 458 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 459 if (mxge_verbose) 460 device_printf(sc->dev, 461 "Enabled ECRC on upstream Nvidia bridge " 462 "at %d:%d:%d\n", 463 (int)bus, (int)slot, (int)func); 464 return; 465 } 466 #else 467 static void 468 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 469 { 470 device_printf(sc->dev, 471 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 472 return; 473 } 474 #endif 475 476 477 static int 478 mxge_dma_test(mxge_softc_t *sc, int test_type) 479 { 480 mxge_cmd_t cmd; 481 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 482 int status; 483 uint32_t len; 484 char *test = " "; 485 486 487 /* Run a small DMA test. 488 * The magic multipliers to the length tell the firmware 489 * to do DMA read, write, or read+write tests. The 490 * results are returned in cmd.data0. The upper 16 491 * bits of the return is the number of transfers completed. 492 * The lower 16 bits is the time in 0.5us ticks that the 493 * transfers took to complete. 494 */ 495 496 len = sc->tx_boundary; 497 498 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 499 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 500 cmd.data2 = len * 0x10000; 501 status = mxge_send_cmd(sc, test_type, &cmd); 502 if (status != 0) { 503 test = "read"; 504 goto abort; 505 } 506 sc->read_dma = ((cmd.data0>>16) * len * 2) / 507 (cmd.data0 & 0xffff); 508 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 509 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 510 cmd.data2 = len * 0x1; 511 status = mxge_send_cmd(sc, test_type, &cmd); 512 if (status != 0) { 513 test = "write"; 514 goto abort; 515 } 516 sc->write_dma = ((cmd.data0>>16) * len * 2) / 517 (cmd.data0 & 0xffff); 518 519 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 520 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 521 cmd.data2 = len * 0x10001; 522 status = mxge_send_cmd(sc, test_type, &cmd); 523 if (status != 0) { 524 test = "read/write"; 525 goto abort; 526 } 527 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 528 (cmd.data0 & 0xffff); 529 530 abort: 531 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 532 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 533 test, status); 534 535 return status; 536 } 537 538 /* 539 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 540 * when the PCI-E Completion packets are aligned on an 8-byte 541 * boundary. Some PCI-E chip sets always align Completion packets; on 542 * the ones that do not, the alignment can be enforced by enabling 543 * ECRC generation (if supported). 544 * 545 * When PCI-E Completion packets are not aligned, it is actually more 546 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 547 * 548 * If the driver can neither enable ECRC nor verify that it has 549 * already been enabled, then it must use a firmware image which works 550 * around unaligned completion packets (ethp_z8e.dat), and it should 551 * also ensure that it never gives the device a Read-DMA which is 552 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 553 * enabled, then the driver should use the aligned (eth_z8e.dat) 554 * firmware image, and set tx_boundary to 4KB. 555 */ 556 557 static int 558 mxge_firmware_probe(mxge_softc_t *sc) 559 { 560 device_t dev = sc->dev; 561 int reg, status; 562 uint16_t pectl; 563 564 sc->tx_boundary = 4096; 565 /* 566 * Verify the max read request size was set to 4KB 567 * before trying the test with 4KB. 568 */ 569 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 570 pectl = pci_read_config(dev, reg + 0x8, 2); 571 if ((pectl & (5 << 12)) != (5 << 12)) { 572 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 573 pectl); 574 sc->tx_boundary = 2048; 575 } 576 } 577 578 /* 579 * load the optimized firmware (which assumes aligned PCIe 580 * completions) in order to see if it works on this host. 581 */ 582 sc->fw_name = mxge_fw_aligned; 583 status = mxge_load_firmware(sc, 1); 584 if (status != 0) { 585 return status; 586 } 587 588 /* 589 * Enable ECRC if possible 590 */ 591 mxge_enable_nvidia_ecrc(sc); 592 593 /* 594 * Run a DMA test which watches for unaligned completions and 595 * aborts on the first one seen. Not required on Z8ES or newer. 596 */ 597 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 598 return 0; 599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 600 if (status == 0) 601 return 0; /* keep the aligned firmware */ 602 603 if (status != E2BIG) 604 device_printf(dev, "DMA test failed: %d\n", status); 605 if (status == ENOSYS) 606 device_printf(dev, "Falling back to ethp! " 607 "Please install up to date fw\n"); 608 return status; 609 } 610 611 static int 612 mxge_select_firmware(mxge_softc_t *sc) 613 { 614 int aligned = 0; 615 int force_firmware = mxge_force_firmware; 616 617 if (sc->throttle) 618 force_firmware = sc->throttle; 619 620 if (force_firmware != 0) { 621 if (force_firmware == 1) 622 aligned = 1; 623 else 624 aligned = 0; 625 if (mxge_verbose) 626 device_printf(sc->dev, 627 "Assuming %s completions (forced)\n", 628 aligned ? "aligned" : "unaligned"); 629 goto abort; 630 } 631 632 /* if the PCIe link width is 4 or less, we can use the aligned 633 firmware and skip any checks */ 634 if (sc->link_width != 0 && sc->link_width <= 4) { 635 device_printf(sc->dev, 636 "PCIe x%d Link, expect reduced performance\n", 637 sc->link_width); 638 aligned = 1; 639 goto abort; 640 } 641 642 if (0 == mxge_firmware_probe(sc)) 643 return 0; 644 645 abort: 646 if (aligned) { 647 sc->fw_name = mxge_fw_aligned; 648 sc->tx_boundary = 4096; 649 } else { 650 sc->fw_name = mxge_fw_unaligned; 651 sc->tx_boundary = 2048; 652 } 653 return (mxge_load_firmware(sc, 0)); 654 } 655 656 static int 657 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 658 { 659 660 661 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 662 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 663 be32toh(hdr->mcp_type)); 664 return EIO; 665 } 666 667 /* save firmware version for sysctl */ 668 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 669 if (mxge_verbose) 670 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 671 672 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 673 &sc->fw_ver_minor, &sc->fw_ver_tiny); 674 675 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 676 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 677 device_printf(sc->dev, "Found firmware version %s\n", 678 sc->fw_version); 679 device_printf(sc->dev, "Driver needs %d.%d\n", 680 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 681 return EINVAL; 682 } 683 return 0; 684 685 } 686 687 static int 688 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 689 { 690 z_stream zs; 691 char *inflate_buffer; 692 const struct firmware *fw; 693 const mcp_gen_header_t *hdr; 694 unsigned hdr_offset; 695 int status; 696 unsigned int i; 697 char dummy; 698 size_t fw_len; 699 700 fw = firmware_get(sc->fw_name); 701 if (fw == NULL) { 702 device_printf(sc->dev, "Could not find firmware image %s\n", 703 sc->fw_name); 704 return ENOENT; 705 } 706 707 708 709 /* setup zlib and decompress f/w */ 710 bzero(&zs, sizeof (zs)); 711 zs.zalloc = zcalloc_nowait; 712 zs.zfree = zcfree; 713 status = inflateInit(&zs); 714 if (status != Z_OK) { 715 status = EIO; 716 goto abort_with_fw; 717 } 718 719 /* the uncompressed size is stored as the firmware version, 720 which would otherwise go unused */ 721 fw_len = (size_t) fw->version; 722 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 723 if (inflate_buffer == NULL) 724 goto abort_with_zs; 725 zs.avail_in = fw->datasize; 726 zs.next_in = __DECONST(char *, fw->data); 727 zs.avail_out = fw_len; 728 zs.next_out = inflate_buffer; 729 status = inflate(&zs, Z_FINISH); 730 if (status != Z_STREAM_END) { 731 device_printf(sc->dev, "zlib %d\n", status); 732 status = EIO; 733 goto abort_with_buffer; 734 } 735 736 /* check id */ 737 hdr_offset = htobe32(*(const uint32_t *) 738 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 739 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 740 device_printf(sc->dev, "Bad firmware file"); 741 status = EIO; 742 goto abort_with_buffer; 743 } 744 hdr = (const void*)(inflate_buffer + hdr_offset); 745 746 status = mxge_validate_firmware(sc, hdr); 747 if (status != 0) 748 goto abort_with_buffer; 749 750 /* Copy the inflated firmware to NIC SRAM. */ 751 for (i = 0; i < fw_len; i += 256) { 752 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 753 inflate_buffer + i, 754 min(256U, (unsigned)(fw_len - i))); 755 wmb(); 756 dummy = *sc->sram; 757 wmb(); 758 } 759 760 *limit = fw_len; 761 status = 0; 762 abort_with_buffer: 763 free(inflate_buffer, M_TEMP); 764 abort_with_zs: 765 inflateEnd(&zs); 766 abort_with_fw: 767 firmware_put(fw, FIRMWARE_UNLOAD); 768 return status; 769 } 770 771 /* 772 * Enable or disable periodic RDMAs from the host to make certain 773 * chipsets resend dropped PCIe messages 774 */ 775 776 static void 777 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 778 { 779 char buf_bytes[72]; 780 volatile uint32_t *confirm; 781 volatile char *submit; 782 uint32_t *buf, dma_low, dma_high; 783 int i; 784 785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 786 787 /* clear confirmation addr */ 788 confirm = (volatile uint32_t *)sc->cmd; 789 *confirm = 0; 790 wmb(); 791 792 /* send an rdma command to the PCIe engine, and wait for the 793 response in the confirmation address. The firmware should 794 write a -1 there to indicate it is alive and well 795 */ 796 797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 799 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 800 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 801 buf[2] = htobe32(0xffffffff); /* confirm data */ 802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 804 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 805 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 806 buf[5] = htobe32(enable); /* enable? */ 807 808 809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 810 811 mxge_pio_copy(submit, buf, 64); 812 wmb(); 813 DELAY(1000); 814 wmb(); 815 i = 0; 816 while (*confirm != 0xffffffff && i < 20) { 817 DELAY(1000); 818 i++; 819 } 820 if (*confirm != 0xffffffff) { 821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 822 (enable ? "enable" : "disable"), confirm, 823 *confirm); 824 } 825 return; 826 } 827 828 static int 829 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 830 { 831 mcp_cmd_t *buf; 832 char buf_bytes[sizeof(*buf) + 8]; 833 volatile mcp_cmd_response_t *response = sc->cmd; 834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 835 uint32_t dma_low, dma_high; 836 int err, sleep_total = 0; 837 838 /* ensure buf is aligned to 8 bytes */ 839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 840 841 buf->data0 = htobe32(data->data0); 842 buf->data1 = htobe32(data->data1); 843 buf->data2 = htobe32(data->data2); 844 buf->cmd = htobe32(cmd); 845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 847 848 buf->response_addr.low = htobe32(dma_low); 849 buf->response_addr.high = htobe32(dma_high); 850 mtx_lock(&sc->cmd_mtx); 851 response->result = 0xffffffff; 852 wmb(); 853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 854 855 /* wait up to 20ms */ 856 err = EAGAIN; 857 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 858 bus_dmamap_sync(sc->cmd_dma.dmat, 859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 860 wmb(); 861 switch (be32toh(response->result)) { 862 case 0: 863 data->data0 = be32toh(response->data); 864 err = 0; 865 break; 866 case 0xffffffff: 867 DELAY(1000); 868 break; 869 case MXGEFW_CMD_UNKNOWN: 870 err = ENOSYS; 871 break; 872 case MXGEFW_CMD_ERROR_UNALIGNED: 873 err = E2BIG; 874 break; 875 case MXGEFW_CMD_ERROR_BUSY: 876 err = EBUSY; 877 break; 878 case MXGEFW_CMD_ERROR_I2C_ABSENT: 879 err = ENXIO; 880 break; 881 default: 882 device_printf(sc->dev, 883 "mxge: command %d " 884 "failed, result = %d\n", 885 cmd, be32toh(response->result)); 886 err = ENXIO; 887 break; 888 } 889 if (err != EAGAIN) 890 break; 891 } 892 if (err == EAGAIN) 893 device_printf(sc->dev, "mxge: command %d timed out" 894 "result = %d\n", 895 cmd, be32toh(response->result)); 896 mtx_unlock(&sc->cmd_mtx); 897 return err; 898 } 899 900 static int 901 mxge_adopt_running_firmware(mxge_softc_t *sc) 902 { 903 struct mcp_gen_header *hdr; 904 const size_t bytes = sizeof (struct mcp_gen_header); 905 size_t hdr_offset; 906 int status; 907 908 /* find running firmware header */ 909 hdr_offset = htobe32(*(volatile uint32_t *) 910 (sc->sram + MCP_HEADER_PTR_OFFSET)); 911 912 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 913 device_printf(sc->dev, 914 "Running firmware has bad header offset (%d)\n", 915 (int)hdr_offset); 916 return EIO; 917 } 918 919 /* copy header of running firmware from SRAM to host memory to 920 * validate firmware */ 921 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 922 if (hdr == NULL) { 923 device_printf(sc->dev, "could not malloc firmware hdr\n"); 924 return ENOMEM; 925 } 926 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 927 rman_get_bushandle(sc->mem_res), 928 hdr_offset, (char *)hdr, bytes); 929 status = mxge_validate_firmware(sc, hdr); 930 free(hdr, M_DEVBUF); 931 932 /* 933 * check to see if adopted firmware has bug where adopting 934 * it will cause broadcasts to be filtered unless the NIC 935 * is kept in ALLMULTI mode 936 */ 937 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 938 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 939 sc->adopted_rx_filter_bug = 1; 940 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 941 "working around rx filter bug\n", 942 sc->fw_ver_major, sc->fw_ver_minor, 943 sc->fw_ver_tiny); 944 } 945 946 return status; 947 } 948 949 950 static int 951 mxge_load_firmware(mxge_softc_t *sc, int adopt) 952 { 953 volatile uint32_t *confirm; 954 volatile char *submit; 955 char buf_bytes[72]; 956 uint32_t *buf, size, dma_low, dma_high; 957 int status, i; 958 959 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 960 961 size = sc->sram_size; 962 status = mxge_load_firmware_helper(sc, &size); 963 if (status) { 964 if (!adopt) 965 return status; 966 /* Try to use the currently running firmware, if 967 it is new enough */ 968 status = mxge_adopt_running_firmware(sc); 969 if (status) { 970 device_printf(sc->dev, 971 "failed to adopt running firmware\n"); 972 return status; 973 } 974 device_printf(sc->dev, 975 "Successfully adopted running firmware\n"); 976 if (sc->tx_boundary == 4096) { 977 device_printf(sc->dev, 978 "Using firmware currently running on NIC" 979 ". For optimal\n"); 980 device_printf(sc->dev, 981 "performance consider loading optimized " 982 "firmware\n"); 983 } 984 sc->fw_name = mxge_fw_unaligned; 985 sc->tx_boundary = 2048; 986 return 0; 987 } 988 /* clear confirmation addr */ 989 confirm = (volatile uint32_t *)sc->cmd; 990 *confirm = 0; 991 wmb(); 992 /* send a reload command to the bootstrap MCP, and wait for the 993 response in the confirmation address. The firmware should 994 write a -1 there to indicate it is alive and well 995 */ 996 997 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 998 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 999 1000 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1001 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1002 buf[2] = htobe32(0xffffffff); /* confirm data */ 1003 1004 /* FIX: All newest firmware should un-protect the bottom of 1005 the sram before handoff. However, the very first interfaces 1006 do not. Therefore the handoff copy must skip the first 8 bytes 1007 */ 1008 /* where the code starts*/ 1009 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1010 buf[4] = htobe32(size - 8); /* length of code */ 1011 buf[5] = htobe32(8); /* where to copy to */ 1012 buf[6] = htobe32(0); /* where to jump to */ 1013 1014 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1015 mxge_pio_copy(submit, buf, 64); 1016 wmb(); 1017 DELAY(1000); 1018 wmb(); 1019 i = 0; 1020 while (*confirm != 0xffffffff && i < 20) { 1021 DELAY(1000*10); 1022 i++; 1023 bus_dmamap_sync(sc->cmd_dma.dmat, 1024 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1025 } 1026 if (*confirm != 0xffffffff) { 1027 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1028 confirm, *confirm); 1029 1030 return ENXIO; 1031 } 1032 return 0; 1033 } 1034 1035 static int 1036 mxge_update_mac_address(mxge_softc_t *sc) 1037 { 1038 mxge_cmd_t cmd; 1039 uint8_t *addr = sc->mac_addr; 1040 int status; 1041 1042 1043 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1044 | (addr[2] << 8) | addr[3]); 1045 1046 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1047 1048 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1049 return status; 1050 } 1051 1052 static int 1053 mxge_change_pause(mxge_softc_t *sc, int pause) 1054 { 1055 mxge_cmd_t cmd; 1056 int status; 1057 1058 if (pause) 1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1060 &cmd); 1061 else 1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1063 &cmd); 1064 1065 if (status) { 1066 device_printf(sc->dev, "Failed to set flow control mode\n"); 1067 return ENXIO; 1068 } 1069 sc->pause = pause; 1070 return 0; 1071 } 1072 1073 static void 1074 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1075 { 1076 mxge_cmd_t cmd; 1077 int status; 1078 1079 if (mxge_always_promisc) 1080 promisc = 1; 1081 1082 if (promisc) 1083 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1084 &cmd); 1085 else 1086 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1087 &cmd); 1088 1089 if (status) { 1090 device_printf(sc->dev, "Failed to set promisc mode\n"); 1091 } 1092 } 1093 1094 struct mxge_add_maddr_ctx { 1095 mxge_softc_t *sc; 1096 int error; 1097 }; 1098 1099 static u_int 1100 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) 1101 { 1102 struct mxge_add_maddr_ctx *ctx = arg; 1103 mxge_cmd_t cmd; 1104 1105 if (ctx->error != 0) 1106 return (0); 1107 bcopy(LLADDR(sdl), &cmd.data0, 4); 1108 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2); 1109 cmd.data0 = htonl(cmd.data0); 1110 cmd.data1 = htonl(cmd.data1); 1111 1112 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1113 1114 return (1); 1115 } 1116 1117 static void 1118 mxge_set_multicast_list(mxge_softc_t *sc) 1119 { 1120 struct mxge_add_maddr_ctx ctx; 1121 struct ifnet *ifp = sc->ifp; 1122 mxge_cmd_t cmd; 1123 int err; 1124 1125 /* This firmware is known to not support multicast */ 1126 if (!sc->fw_multicast_support) 1127 return; 1128 1129 /* Disable multicast filtering while we play with the lists*/ 1130 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1131 if (err != 0) { 1132 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1133 " error status: %d\n", err); 1134 return; 1135 } 1136 1137 if (sc->adopted_rx_filter_bug) 1138 return; 1139 1140 if (ifp->if_flags & IFF_ALLMULTI) 1141 /* request to disable multicast filtering, so quit here */ 1142 return; 1143 1144 /* Flush all the filters */ 1145 1146 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1147 if (err != 0) { 1148 device_printf(sc->dev, 1149 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1150 ", error status: %d\n", err); 1151 return; 1152 } 1153 1154 /* Walk the multicast list, and add each address */ 1155 ctx.sc = sc; 1156 ctx.error = 0; 1157 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx); 1158 if (ctx.error != 0) { 1159 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1160 "error status:" "%d\t", ctx.error); 1161 /* abort, leaving multicast filtering off */ 1162 return; 1163 } 1164 1165 /* Enable multicast filtering */ 1166 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1167 if (err != 0) { 1168 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1169 ", error status: %d\n", err); 1170 } 1171 } 1172 1173 static int 1174 mxge_max_mtu(mxge_softc_t *sc) 1175 { 1176 mxge_cmd_t cmd; 1177 int status; 1178 1179 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1180 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1181 1182 /* try to set nbufs to see if it we can 1183 use virtually contiguous jumbos */ 1184 cmd.data0 = 0; 1185 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1186 &cmd); 1187 if (status == 0) 1188 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1189 1190 /* otherwise, we're limited to MJUMPAGESIZE */ 1191 return MJUMPAGESIZE - MXGEFW_PAD; 1192 } 1193 1194 static int 1195 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1196 { 1197 struct mxge_slice_state *ss; 1198 mxge_rx_done_t *rx_done; 1199 volatile uint32_t *irq_claim; 1200 mxge_cmd_t cmd; 1201 int slice, status; 1202 1203 /* try to send a reset command to the card to see if it 1204 is alive */ 1205 memset(&cmd, 0, sizeof (cmd)); 1206 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1207 if (status != 0) { 1208 device_printf(sc->dev, "failed reset\n"); 1209 return ENXIO; 1210 } 1211 1212 mxge_dummy_rdma(sc, 1); 1213 1214 1215 /* set the intrq size */ 1216 cmd.data0 = sc->rx_ring_size; 1217 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1218 1219 /* 1220 * Even though we already know how many slices are supported 1221 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1222 * has magic side effects, and must be called after a reset. 1223 * It must be called prior to calling any RSS related cmds, 1224 * including assigning an interrupt queue for anything but 1225 * slice 0. It must also be called *after* 1226 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1227 * the firmware to compute offsets. 1228 */ 1229 1230 if (sc->num_slices > 1) { 1231 /* ask the maximum number of slices it supports */ 1232 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1233 &cmd); 1234 if (status != 0) { 1235 device_printf(sc->dev, 1236 "failed to get number of slices\n"); 1237 return status; 1238 } 1239 /* 1240 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1241 * to setting up the interrupt queue DMA 1242 */ 1243 cmd.data0 = sc->num_slices; 1244 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1245 #ifdef IFNET_BUF_RING 1246 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1247 #endif 1248 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1249 &cmd); 1250 if (status != 0) { 1251 device_printf(sc->dev, 1252 "failed to set number of slices\n"); 1253 return status; 1254 } 1255 } 1256 1257 1258 if (interrupts_setup) { 1259 /* Now exchange information about interrupts */ 1260 for (slice = 0; slice < sc->num_slices; slice++) { 1261 rx_done = &sc->ss[slice].rx_done; 1262 memset(rx_done->entry, 0, sc->rx_ring_size); 1263 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1264 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1265 cmd.data2 = slice; 1266 status |= mxge_send_cmd(sc, 1267 MXGEFW_CMD_SET_INTRQ_DMA, 1268 &cmd); 1269 } 1270 } 1271 1272 status |= mxge_send_cmd(sc, 1273 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1274 1275 1276 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1277 1278 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1279 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1280 1281 1282 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1283 &cmd); 1284 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1285 if (status != 0) { 1286 device_printf(sc->dev, "failed set interrupt parameters\n"); 1287 return status; 1288 } 1289 1290 1291 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1292 1293 1294 /* run a DMA benchmark */ 1295 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1296 1297 for (slice = 0; slice < sc->num_slices; slice++) { 1298 ss = &sc->ss[slice]; 1299 1300 ss->irq_claim = irq_claim + (2 * slice); 1301 /* reset mcp/driver shared state back to 0 */ 1302 ss->rx_done.idx = 0; 1303 ss->rx_done.cnt = 0; 1304 ss->tx.req = 0; 1305 ss->tx.done = 0; 1306 ss->tx.pkt_done = 0; 1307 ss->tx.queue_active = 0; 1308 ss->tx.activate = 0; 1309 ss->tx.deactivate = 0; 1310 ss->tx.wake = 0; 1311 ss->tx.defrag = 0; 1312 ss->tx.stall = 0; 1313 ss->rx_big.cnt = 0; 1314 ss->rx_small.cnt = 0; 1315 ss->lc.lro_bad_csum = 0; 1316 ss->lc.lro_queued = 0; 1317 ss->lc.lro_flushed = 0; 1318 if (ss->fw_stats != NULL) { 1319 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1320 } 1321 } 1322 sc->rdma_tags_available = 15; 1323 status = mxge_update_mac_address(sc); 1324 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1325 mxge_change_pause(sc, sc->pause); 1326 mxge_set_multicast_list(sc); 1327 if (sc->throttle) { 1328 cmd.data0 = sc->throttle; 1329 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1330 &cmd)) { 1331 device_printf(sc->dev, 1332 "can't enable throttle\n"); 1333 } 1334 } 1335 return status; 1336 } 1337 1338 static int 1339 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1340 { 1341 mxge_cmd_t cmd; 1342 mxge_softc_t *sc; 1343 int err; 1344 unsigned int throttle; 1345 1346 sc = arg1; 1347 throttle = sc->throttle; 1348 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1349 if (err != 0) { 1350 return err; 1351 } 1352 1353 if (throttle == sc->throttle) 1354 return 0; 1355 1356 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1357 return EINVAL; 1358 1359 mtx_lock(&sc->driver_mtx); 1360 cmd.data0 = throttle; 1361 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1362 if (err == 0) 1363 sc->throttle = throttle; 1364 mtx_unlock(&sc->driver_mtx); 1365 return err; 1366 } 1367 1368 static int 1369 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1370 { 1371 mxge_softc_t *sc; 1372 unsigned int intr_coal_delay; 1373 int err; 1374 1375 sc = arg1; 1376 intr_coal_delay = sc->intr_coal_delay; 1377 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1378 if (err != 0) { 1379 return err; 1380 } 1381 if (intr_coal_delay == sc->intr_coal_delay) 1382 return 0; 1383 1384 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1385 return EINVAL; 1386 1387 mtx_lock(&sc->driver_mtx); 1388 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1389 sc->intr_coal_delay = intr_coal_delay; 1390 1391 mtx_unlock(&sc->driver_mtx); 1392 return err; 1393 } 1394 1395 static int 1396 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1397 { 1398 mxge_softc_t *sc; 1399 unsigned int enabled; 1400 int err; 1401 1402 sc = arg1; 1403 enabled = sc->pause; 1404 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1405 if (err != 0) { 1406 return err; 1407 } 1408 if (enabled == sc->pause) 1409 return 0; 1410 1411 mtx_lock(&sc->driver_mtx); 1412 err = mxge_change_pause(sc, enabled); 1413 mtx_unlock(&sc->driver_mtx); 1414 return err; 1415 } 1416 1417 static int 1418 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1419 { 1420 int err; 1421 1422 if (arg1 == NULL) 1423 return EFAULT; 1424 arg2 = be32toh(*(int *)arg1); 1425 arg1 = NULL; 1426 err = sysctl_handle_int(oidp, arg1, arg2, req); 1427 1428 return err; 1429 } 1430 1431 static void 1432 mxge_rem_sysctls(mxge_softc_t *sc) 1433 { 1434 struct mxge_slice_state *ss; 1435 int slice; 1436 1437 if (sc->slice_sysctl_tree == NULL) 1438 return; 1439 1440 for (slice = 0; slice < sc->num_slices; slice++) { 1441 ss = &sc->ss[slice]; 1442 if (ss == NULL || ss->sysctl_tree == NULL) 1443 continue; 1444 sysctl_ctx_free(&ss->sysctl_ctx); 1445 ss->sysctl_tree = NULL; 1446 } 1447 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1448 sc->slice_sysctl_tree = NULL; 1449 } 1450 1451 static void 1452 mxge_add_sysctls(mxge_softc_t *sc) 1453 { 1454 struct sysctl_ctx_list *ctx; 1455 struct sysctl_oid_list *children; 1456 mcp_irq_data_t *fw; 1457 struct mxge_slice_state *ss; 1458 int slice; 1459 char slice_num[8]; 1460 1461 ctx = device_get_sysctl_ctx(sc->dev); 1462 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1463 fw = sc->ss[0].fw_stats; 1464 1465 /* random information */ 1466 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1467 "firmware_version", 1468 CTLFLAG_RD, sc->fw_version, 1469 0, "firmware version"); 1470 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1471 "serial_number", 1472 CTLFLAG_RD, sc->serial_number_string, 1473 0, "serial number"); 1474 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1475 "product_code", 1476 CTLFLAG_RD, sc->product_code_string, 1477 0, "product_code"); 1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1479 "pcie_link_width", 1480 CTLFLAG_RD, &sc->link_width, 1481 0, "tx_boundary"); 1482 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1483 "tx_boundary", 1484 CTLFLAG_RD, &sc->tx_boundary, 1485 0, "tx_boundary"); 1486 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1487 "write_combine", 1488 CTLFLAG_RD, &sc->wc, 1489 0, "write combining PIO?"); 1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1491 "read_dma_MBs", 1492 CTLFLAG_RD, &sc->read_dma, 1493 0, "DMA Read speed in MB/s"); 1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1495 "write_dma_MBs", 1496 CTLFLAG_RD, &sc->write_dma, 1497 0, "DMA Write speed in MB/s"); 1498 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1499 "read_write_dma_MBs", 1500 CTLFLAG_RD, &sc->read_write_dma, 1501 0, "DMA concurrent Read/Write speed in MB/s"); 1502 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1503 "watchdog_resets", 1504 CTLFLAG_RD, &sc->watchdog_resets, 1505 0, "Number of times NIC was reset"); 1506 1507 1508 /* performance related tunables */ 1509 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1510 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1511 sc, 0, mxge_change_intr_coal, "I", 1512 "interrupt coalescing delay in usecs"); 1513 1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1515 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1516 mxge_change_throttle, "I", "transmit throttling"); 1517 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "flow_control_enabled", 1520 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 1521 mxge_change_flow_control, "I", 1522 "interrupt coalescing delay in usecs"); 1523 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1525 "deassert_wait", 1526 CTLFLAG_RW, &mxge_deassert_wait, 1527 0, "Wait for IRQ line to go low in ihandler"); 1528 1529 /* stats block from firmware is in network byte order. 1530 Need to swap it */ 1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1532 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1533 &fw->link_up, 0, mxge_handle_be32, "I", "link up"); 1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1535 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1536 &fw->rdma_tags_available, 0, mxge_handle_be32, "I", 1537 "rdma_tags_available"); 1538 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1539 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1540 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I", 1541 "dropped_bad_crc32"); 1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1543 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1544 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_link_error_or_filtered", 1547 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1548 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I", 1549 "dropped_link_error_or_filtered"); 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "dropped_link_overflow", 1552 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1553 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I", 1554 "dropped_link_overflow"); 1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1556 "dropped_multicast_filtered", 1557 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1558 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I", 1559 "dropped_multicast_filtered"); 1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1561 "dropped_no_big_buffer", 1562 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1563 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I", 1564 "dropped_no_big_buffer"); 1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1566 "dropped_no_small_buffer", 1567 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1568 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I", 1569 "dropped_no_small_buffer"); 1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1571 "dropped_overrun", 1572 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1573 &fw->dropped_overrun, 0, mxge_handle_be32, "I", 1574 "dropped_overrun"); 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1577 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause"); 1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1579 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1580 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt"); 1581 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_unicast_filtered", 1584 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 1585 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I", 1586 "dropped_unicast_filtered"); 1587 1588 /* verbose printing? */ 1589 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1590 "verbose", 1591 CTLFLAG_RW, &mxge_verbose, 1592 0, "verbose printing"); 1593 1594 /* add counters exported for debugging from all slices */ 1595 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1596 sc->slice_sysctl_tree = 1597 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1598 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1599 1600 for (slice = 0; slice < sc->num_slices; slice++) { 1601 ss = &sc->ss[slice]; 1602 sysctl_ctx_init(&ss->sysctl_ctx); 1603 ctx = &ss->sysctl_ctx; 1604 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1605 sprintf(slice_num, "%d", slice); 1606 ss->sysctl_tree = 1607 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1608 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 1609 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1611 "rx_small_cnt", 1612 CTLFLAG_RD, &ss->rx_small.cnt, 1613 0, "rx_small_cnt"); 1614 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1615 "rx_big_cnt", 1616 CTLFLAG_RD, &ss->rx_big.cnt, 1617 0, "rx_small_cnt"); 1618 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1619 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1620 0, "number of lro merge queues flushed"); 1621 1622 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1623 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1624 0, "number of bad csums preventing LRO"); 1625 1626 SYSCTL_ADD_U64(ctx, children, OID_AUTO, 1627 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1628 0, "number of frames appended to lro merge" 1629 "queues"); 1630 1631 #ifndef IFNET_BUF_RING 1632 /* only transmit from slice 0 for now */ 1633 if (slice > 0) 1634 continue; 1635 #endif 1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1637 "tx_req", 1638 CTLFLAG_RD, &ss->tx.req, 1639 0, "tx_req"); 1640 1641 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1642 "tx_done", 1643 CTLFLAG_RD, &ss->tx.done, 1644 0, "tx_done"); 1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1646 "tx_pkt_done", 1647 CTLFLAG_RD, &ss->tx.pkt_done, 1648 0, "tx_done"); 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "tx_stall", 1651 CTLFLAG_RD, &ss->tx.stall, 1652 0, "tx_stall"); 1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1654 "tx_wake", 1655 CTLFLAG_RD, &ss->tx.wake, 1656 0, "tx_wake"); 1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1658 "tx_defrag", 1659 CTLFLAG_RD, &ss->tx.defrag, 1660 0, "tx_defrag"); 1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1662 "tx_queue_active", 1663 CTLFLAG_RD, &ss->tx.queue_active, 1664 0, "tx_queue_active"); 1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1666 "tx_activate", 1667 CTLFLAG_RD, &ss->tx.activate, 1668 0, "tx_activate"); 1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1670 "tx_deactivate", 1671 CTLFLAG_RD, &ss->tx.deactivate, 1672 0, "tx_deactivate"); 1673 } 1674 } 1675 1676 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1677 backwards one at a time and handle ring wraps */ 1678 1679 static inline void 1680 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1681 mcp_kreq_ether_send_t *src, int cnt) 1682 { 1683 int idx, starting_slot; 1684 starting_slot = tx->req; 1685 while (cnt > 1) { 1686 cnt--; 1687 idx = (starting_slot + cnt) & tx->mask; 1688 mxge_pio_copy(&tx->lanai[idx], 1689 &src[cnt], sizeof(*src)); 1690 wmb(); 1691 } 1692 } 1693 1694 /* 1695 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1696 * at most 32 bytes at a time, so as to avoid involving the software 1697 * pio handler in the nic. We re-write the first segment's flags 1698 * to mark them valid only after writing the entire chain 1699 */ 1700 1701 static inline void 1702 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1703 int cnt) 1704 { 1705 int idx, i; 1706 uint32_t *src_ints; 1707 volatile uint32_t *dst_ints; 1708 mcp_kreq_ether_send_t *srcp; 1709 volatile mcp_kreq_ether_send_t *dstp, *dst; 1710 uint8_t last_flags; 1711 1712 idx = tx->req & tx->mask; 1713 1714 last_flags = src->flags; 1715 src->flags = 0; 1716 wmb(); 1717 dst = dstp = &tx->lanai[idx]; 1718 srcp = src; 1719 1720 if ((idx + cnt) < tx->mask) { 1721 for (i = 0; i < (cnt - 1); i += 2) { 1722 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1723 wmb(); /* force write every 32 bytes */ 1724 srcp += 2; 1725 dstp += 2; 1726 } 1727 } else { 1728 /* submit all but the first request, and ensure 1729 that it is submitted below */ 1730 mxge_submit_req_backwards(tx, src, cnt); 1731 i = 0; 1732 } 1733 if (i < cnt) { 1734 /* submit the first request */ 1735 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1736 wmb(); /* barrier before setting valid flag */ 1737 } 1738 1739 /* re-write the last 32-bits with the valid flags */ 1740 src->flags = last_flags; 1741 src_ints = (uint32_t *)src; 1742 src_ints+=3; 1743 dst_ints = (volatile uint32_t *)dst; 1744 dst_ints+=3; 1745 *dst_ints = *src_ints; 1746 tx->req += cnt; 1747 wmb(); 1748 } 1749 1750 static int 1751 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1752 struct mxge_pkt_info *pi) 1753 { 1754 struct ether_vlan_header *eh; 1755 uint16_t etype; 1756 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1757 #if IFCAP_TSO6 && defined(INET6) 1758 int nxt; 1759 #endif 1760 1761 eh = mtod(m, struct ether_vlan_header *); 1762 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1763 etype = ntohs(eh->evl_proto); 1764 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1765 } else { 1766 etype = ntohs(eh->evl_encap_proto); 1767 pi->ip_off = ETHER_HDR_LEN; 1768 } 1769 1770 switch (etype) { 1771 case ETHERTYPE_IP: 1772 /* 1773 * ensure ip header is in first mbuf, copy it to a 1774 * scratch buffer if not 1775 */ 1776 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1777 pi->ip6 = NULL; 1778 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1779 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1780 ss->scratch); 1781 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1782 } 1783 pi->ip_hlen = pi->ip->ip_hl << 2; 1784 if (!tso) 1785 return 0; 1786 1787 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1788 sizeof(struct tcphdr))) { 1789 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1790 sizeof(struct tcphdr), ss->scratch); 1791 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1792 } 1793 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1794 break; 1795 #if IFCAP_TSO6 && defined(INET6) 1796 case ETHERTYPE_IPV6: 1797 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1798 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1799 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1800 ss->scratch); 1801 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1802 } 1803 nxt = 0; 1804 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1805 pi->ip_hlen -= pi->ip_off; 1806 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1807 return EINVAL; 1808 1809 if (!tso) 1810 return 0; 1811 1812 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1813 return EINVAL; 1814 1815 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1816 sizeof(struct tcphdr))) { 1817 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1818 sizeof(struct tcphdr), ss->scratch); 1819 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1820 } 1821 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1822 break; 1823 #endif 1824 default: 1825 return EINVAL; 1826 } 1827 return 0; 1828 } 1829 1830 #if IFCAP_TSO4 1831 1832 static void 1833 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1834 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1835 { 1836 mxge_tx_ring_t *tx; 1837 mcp_kreq_ether_send_t *req; 1838 bus_dma_segment_t *seg; 1839 uint32_t low, high_swapped; 1840 int len, seglen, cum_len, cum_len_next; 1841 int next_is_first, chop, cnt, rdma_count, small; 1842 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1843 uint8_t flags, flags_next; 1844 static int once; 1845 1846 mss = m->m_pkthdr.tso_segsz; 1847 1848 /* negative cum_len signifies to the 1849 * send loop that we are still in the 1850 * header portion of the TSO packet. 1851 */ 1852 1853 cksum_offset = pi->ip_off + pi->ip_hlen; 1854 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1855 1856 /* TSO implies checksum offload on this hardware */ 1857 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1858 /* 1859 * If packet has full TCP csum, replace it with pseudo hdr 1860 * sum that the NIC expects, otherwise the NIC will emit 1861 * packets with bad TCP checksums. 1862 */ 1863 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1864 if (pi->ip6) { 1865 #if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1866 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1867 sum = in6_cksum_pseudo(pi->ip6, 1868 m->m_pkthdr.len - cksum_offset, 1869 IPPROTO_TCP, 0); 1870 #endif 1871 } else { 1872 #ifdef INET 1873 m->m_pkthdr.csum_flags |= CSUM_TCP; 1874 sum = in_pseudo(pi->ip->ip_src.s_addr, 1875 pi->ip->ip_dst.s_addr, 1876 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1877 cksum_offset))); 1878 #endif 1879 } 1880 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1881 cksum_offset, sizeof(sum), (caddr_t)&sum); 1882 } 1883 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1884 1885 1886 /* for TSO, pseudo_hdr_offset holds mss. 1887 * The firmware figures out where to put 1888 * the checksum by parsing the header. */ 1889 pseudo_hdr_offset = htobe16(mss); 1890 1891 if (pi->ip6) { 1892 /* 1893 * for IPv6 TSO, the "checksum offset" is re-purposed 1894 * to store the TCP header len 1895 */ 1896 cksum_offset = (pi->tcp->th_off << 2); 1897 } 1898 1899 tx = &ss->tx; 1900 req = tx->req_list; 1901 seg = tx->seg_list; 1902 cnt = 0; 1903 rdma_count = 0; 1904 /* "rdma_count" is the number of RDMAs belonging to the 1905 * current packet BEFORE the current send request. For 1906 * non-TSO packets, this is equal to "count". 1907 * For TSO packets, rdma_count needs to be reset 1908 * to 0 after a segment cut. 1909 * 1910 * The rdma_count field of the send request is 1911 * the number of RDMAs of the packet starting at 1912 * that request. For TSO send requests with one ore more cuts 1913 * in the middle, this is the number of RDMAs starting 1914 * after the last cut in the request. All previous 1915 * segments before the last cut implicitly have 1 RDMA. 1916 * 1917 * Since the number of RDMAs is not known beforehand, 1918 * it must be filled-in retroactively - after each 1919 * segmentation cut or at the end of the entire packet. 1920 */ 1921 1922 while (busdma_seg_cnt) { 1923 /* Break the busdma segment up into pieces*/ 1924 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1925 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1926 len = seg->ds_len; 1927 1928 while (len) { 1929 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1930 seglen = len; 1931 cum_len_next = cum_len + seglen; 1932 (req-rdma_count)->rdma_count = rdma_count + 1; 1933 if (__predict_true(cum_len >= 0)) { 1934 /* payload */ 1935 chop = (cum_len_next > mss); 1936 cum_len_next = cum_len_next % mss; 1937 next_is_first = (cum_len_next == 0); 1938 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1939 flags_next |= next_is_first * 1940 MXGEFW_FLAGS_FIRST; 1941 rdma_count |= -(chop | next_is_first); 1942 rdma_count += chop & !next_is_first; 1943 } else if (cum_len_next >= 0) { 1944 /* header ends */ 1945 rdma_count = -1; 1946 cum_len_next = 0; 1947 seglen = -cum_len; 1948 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1949 flags_next = MXGEFW_FLAGS_TSO_PLD | 1950 MXGEFW_FLAGS_FIRST | 1951 (small * MXGEFW_FLAGS_SMALL); 1952 } 1953 1954 req->addr_high = high_swapped; 1955 req->addr_low = htobe32(low); 1956 req->pseudo_hdr_offset = pseudo_hdr_offset; 1957 req->pad = 0; 1958 req->rdma_count = 1; 1959 req->length = htobe16(seglen); 1960 req->cksum_offset = cksum_offset; 1961 req->flags = flags | ((cum_len & 1) * 1962 MXGEFW_FLAGS_ALIGN_ODD); 1963 low += seglen; 1964 len -= seglen; 1965 cum_len = cum_len_next; 1966 flags = flags_next; 1967 req++; 1968 cnt++; 1969 rdma_count++; 1970 if (cksum_offset != 0 && !pi->ip6) { 1971 if (__predict_false(cksum_offset > seglen)) 1972 cksum_offset -= seglen; 1973 else 1974 cksum_offset = 0; 1975 } 1976 if (__predict_false(cnt > tx->max_desc)) 1977 goto drop; 1978 } 1979 busdma_seg_cnt--; 1980 seg++; 1981 } 1982 (req-rdma_count)->rdma_count = rdma_count; 1983 1984 do { 1985 req--; 1986 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1987 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1988 1989 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1990 mxge_submit_req(tx, tx->req_list, cnt); 1991 #ifdef IFNET_BUF_RING 1992 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1993 /* tell the NIC to start polling this slice */ 1994 *tx->send_go = 1; 1995 tx->queue_active = 1; 1996 tx->activate++; 1997 wmb(); 1998 } 1999 #endif 2000 return; 2001 2002 drop: 2003 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2004 m_freem(m); 2005 ss->oerrors++; 2006 if (!once) { 2007 printf("tx->max_desc exceeded via TSO!\n"); 2008 printf("mss = %d, %ld, %d!\n", mss, 2009 (long)seg - (long)tx->seg_list, tx->max_desc); 2010 once = 1; 2011 } 2012 return; 2013 2014 } 2015 2016 #endif /* IFCAP_TSO4 */ 2017 2018 #ifdef MXGE_NEW_VLAN_API 2019 /* 2020 * We reproduce the software vlan tag insertion from 2021 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2022 * vlan tag insertion. We need to advertise this in order to have the 2023 * vlan interface respect our csum offload flags. 2024 */ 2025 static struct mbuf * 2026 mxge_vlan_tag_insert(struct mbuf *m) 2027 { 2028 struct ether_vlan_header *evl; 2029 2030 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2031 if (__predict_false(m == NULL)) 2032 return NULL; 2033 if (m->m_len < sizeof(*evl)) { 2034 m = m_pullup(m, sizeof(*evl)); 2035 if (__predict_false(m == NULL)) 2036 return NULL; 2037 } 2038 /* 2039 * Transform the Ethernet header into an Ethernet header 2040 * with 802.1Q encapsulation. 2041 */ 2042 evl = mtod(m, struct ether_vlan_header *); 2043 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2044 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2045 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2046 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2047 m->m_flags &= ~M_VLANTAG; 2048 return m; 2049 } 2050 #endif /* MXGE_NEW_VLAN_API */ 2051 2052 static void 2053 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2054 { 2055 struct mxge_pkt_info pi = {0,0,0,0}; 2056 mxge_softc_t *sc; 2057 mcp_kreq_ether_send_t *req; 2058 bus_dma_segment_t *seg; 2059 struct mbuf *m_tmp; 2060 struct ifnet *ifp; 2061 mxge_tx_ring_t *tx; 2062 int cnt, cum_len, err, i, idx, odd_flag; 2063 uint16_t pseudo_hdr_offset; 2064 uint8_t flags, cksum_offset; 2065 2066 2067 sc = ss->sc; 2068 ifp = sc->ifp; 2069 tx = &ss->tx; 2070 2071 #ifdef MXGE_NEW_VLAN_API 2072 if (m->m_flags & M_VLANTAG) { 2073 m = mxge_vlan_tag_insert(m); 2074 if (__predict_false(m == NULL)) 2075 goto drop_without_m; 2076 } 2077 #endif 2078 if (m->m_pkthdr.csum_flags & 2079 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2080 if (mxge_parse_tx(ss, m, &pi)) 2081 goto drop; 2082 } 2083 2084 /* (try to) map the frame for DMA */ 2085 idx = tx->req & tx->mask; 2086 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2087 m, tx->seg_list, &cnt, 2088 BUS_DMA_NOWAIT); 2089 if (__predict_false(err == EFBIG)) { 2090 /* Too many segments in the chain. Try 2091 to defrag */ 2092 m_tmp = m_defrag(m, M_NOWAIT); 2093 if (m_tmp == NULL) { 2094 goto drop; 2095 } 2096 ss->tx.defrag++; 2097 m = m_tmp; 2098 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2099 tx->info[idx].map, 2100 m, tx->seg_list, &cnt, 2101 BUS_DMA_NOWAIT); 2102 } 2103 if (__predict_false(err != 0)) { 2104 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2105 " packet len = %d\n", err, m->m_pkthdr.len); 2106 goto drop; 2107 } 2108 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2109 BUS_DMASYNC_PREWRITE); 2110 tx->info[idx].m = m; 2111 2112 #if IFCAP_TSO4 2113 /* TSO is different enough, we handle it in another routine */ 2114 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2115 mxge_encap_tso(ss, m, cnt, &pi); 2116 return; 2117 } 2118 #endif 2119 2120 req = tx->req_list; 2121 cksum_offset = 0; 2122 pseudo_hdr_offset = 0; 2123 flags = MXGEFW_FLAGS_NO_TSO; 2124 2125 /* checksum offloading? */ 2126 if (m->m_pkthdr.csum_flags & 2127 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2128 /* ensure ip header is in first mbuf, copy 2129 it to a scratch buffer if not */ 2130 cksum_offset = pi.ip_off + pi.ip_hlen; 2131 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2132 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2133 req->cksum_offset = cksum_offset; 2134 flags |= MXGEFW_FLAGS_CKSUM; 2135 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2136 } else { 2137 odd_flag = 0; 2138 } 2139 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2140 flags |= MXGEFW_FLAGS_SMALL; 2141 2142 /* convert segments into a request list */ 2143 cum_len = 0; 2144 seg = tx->seg_list; 2145 req->flags = MXGEFW_FLAGS_FIRST; 2146 for (i = 0; i < cnt; i++) { 2147 req->addr_low = 2148 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2149 req->addr_high = 2150 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2151 req->length = htobe16(seg->ds_len); 2152 req->cksum_offset = cksum_offset; 2153 if (cksum_offset > seg->ds_len) 2154 cksum_offset -= seg->ds_len; 2155 else 2156 cksum_offset = 0; 2157 req->pseudo_hdr_offset = pseudo_hdr_offset; 2158 req->pad = 0; /* complete solid 16-byte block */ 2159 req->rdma_count = 1; 2160 req->flags |= flags | ((cum_len & 1) * odd_flag); 2161 cum_len += seg->ds_len; 2162 seg++; 2163 req++; 2164 req->flags = 0; 2165 } 2166 req--; 2167 /* pad runts to 60 bytes */ 2168 if (cum_len < 60) { 2169 req++; 2170 req->addr_low = 2171 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2172 req->addr_high = 2173 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2174 req->length = htobe16(60 - cum_len); 2175 req->cksum_offset = 0; 2176 req->pseudo_hdr_offset = pseudo_hdr_offset; 2177 req->pad = 0; /* complete solid 16-byte block */ 2178 req->rdma_count = 1; 2179 req->flags |= flags | ((cum_len & 1) * odd_flag); 2180 cnt++; 2181 } 2182 2183 tx->req_list[0].rdma_count = cnt; 2184 #if 0 2185 /* print what the firmware will see */ 2186 for (i = 0; i < cnt; i++) { 2187 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2188 "cso:%d, flags:0x%x, rdma:%d\n", 2189 i, (int)ntohl(tx->req_list[i].addr_high), 2190 (int)ntohl(tx->req_list[i].addr_low), 2191 (int)ntohs(tx->req_list[i].length), 2192 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2193 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2194 tx->req_list[i].rdma_count); 2195 } 2196 printf("--------------\n"); 2197 #endif 2198 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2199 mxge_submit_req(tx, tx->req_list, cnt); 2200 #ifdef IFNET_BUF_RING 2201 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2202 /* tell the NIC to start polling this slice */ 2203 *tx->send_go = 1; 2204 tx->queue_active = 1; 2205 tx->activate++; 2206 wmb(); 2207 } 2208 #endif 2209 return; 2210 2211 drop: 2212 m_freem(m); 2213 drop_without_m: 2214 ss->oerrors++; 2215 return; 2216 } 2217 2218 #ifdef IFNET_BUF_RING 2219 static void 2220 mxge_qflush(struct ifnet *ifp) 2221 { 2222 mxge_softc_t *sc = ifp->if_softc; 2223 mxge_tx_ring_t *tx; 2224 struct mbuf *m; 2225 int slice; 2226 2227 for (slice = 0; slice < sc->num_slices; slice++) { 2228 tx = &sc->ss[slice].tx; 2229 mtx_lock(&tx->mtx); 2230 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2231 m_freem(m); 2232 mtx_unlock(&tx->mtx); 2233 } 2234 if_qflush(ifp); 2235 } 2236 2237 static inline void 2238 mxge_start_locked(struct mxge_slice_state *ss) 2239 { 2240 mxge_softc_t *sc; 2241 struct mbuf *m; 2242 struct ifnet *ifp; 2243 mxge_tx_ring_t *tx; 2244 2245 sc = ss->sc; 2246 ifp = sc->ifp; 2247 tx = &ss->tx; 2248 2249 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2250 m = drbr_dequeue(ifp, tx->br); 2251 if (m == NULL) { 2252 return; 2253 } 2254 /* let BPF see it */ 2255 BPF_MTAP(ifp, m); 2256 2257 /* give it to the nic */ 2258 mxge_encap(ss, m); 2259 } 2260 /* ran out of transmit slots */ 2261 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2262 && (!drbr_empty(ifp, tx->br))) { 2263 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2264 tx->stall++; 2265 } 2266 } 2267 2268 static int 2269 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2270 { 2271 mxge_softc_t *sc; 2272 struct ifnet *ifp; 2273 mxge_tx_ring_t *tx; 2274 int err; 2275 2276 sc = ss->sc; 2277 ifp = sc->ifp; 2278 tx = &ss->tx; 2279 2280 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2281 IFF_DRV_RUNNING) { 2282 err = drbr_enqueue(ifp, tx->br, m); 2283 return (err); 2284 } 2285 2286 if (!drbr_needs_enqueue(ifp, tx->br) && 2287 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2288 /* let BPF see it */ 2289 BPF_MTAP(ifp, m); 2290 /* give it to the nic */ 2291 mxge_encap(ss, m); 2292 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2293 return (err); 2294 } 2295 if (!drbr_empty(ifp, tx->br)) 2296 mxge_start_locked(ss); 2297 return (0); 2298 } 2299 2300 static int 2301 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2302 { 2303 mxge_softc_t *sc = ifp->if_softc; 2304 struct mxge_slice_state *ss; 2305 mxge_tx_ring_t *tx; 2306 int err = 0; 2307 int slice; 2308 2309 slice = m->m_pkthdr.flowid; 2310 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2311 2312 ss = &sc->ss[slice]; 2313 tx = &ss->tx; 2314 2315 if (mtx_trylock(&tx->mtx)) { 2316 err = mxge_transmit_locked(ss, m); 2317 mtx_unlock(&tx->mtx); 2318 } else { 2319 err = drbr_enqueue(ifp, tx->br, m); 2320 } 2321 2322 return (err); 2323 } 2324 2325 #else 2326 2327 static inline void 2328 mxge_start_locked(struct mxge_slice_state *ss) 2329 { 2330 mxge_softc_t *sc; 2331 struct mbuf *m; 2332 struct ifnet *ifp; 2333 mxge_tx_ring_t *tx; 2334 2335 sc = ss->sc; 2336 ifp = sc->ifp; 2337 tx = &ss->tx; 2338 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2339 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2340 if (m == NULL) { 2341 return; 2342 } 2343 /* let BPF see it */ 2344 BPF_MTAP(ifp, m); 2345 2346 /* give it to the nic */ 2347 mxge_encap(ss, m); 2348 } 2349 /* ran out of transmit slots */ 2350 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2351 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2352 tx->stall++; 2353 } 2354 } 2355 #endif 2356 static void 2357 mxge_start(struct ifnet *ifp) 2358 { 2359 mxge_softc_t *sc = ifp->if_softc; 2360 struct mxge_slice_state *ss; 2361 2362 /* only use the first slice for now */ 2363 ss = &sc->ss[0]; 2364 mtx_lock(&ss->tx.mtx); 2365 mxge_start_locked(ss); 2366 mtx_unlock(&ss->tx.mtx); 2367 } 2368 2369 /* 2370 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2371 * at most 32 bytes at a time, so as to avoid involving the software 2372 * pio handler in the nic. We re-write the first segment's low 2373 * DMA address to mark it valid only after we write the entire chunk 2374 * in a burst 2375 */ 2376 static inline void 2377 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2378 mcp_kreq_ether_recv_t *src) 2379 { 2380 uint32_t low; 2381 2382 low = src->addr_low; 2383 src->addr_low = 0xffffffff; 2384 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2385 wmb(); 2386 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2387 wmb(); 2388 src->addr_low = low; 2389 dst->addr_low = low; 2390 wmb(); 2391 } 2392 2393 static int 2394 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2395 { 2396 bus_dma_segment_t seg; 2397 struct mbuf *m; 2398 mxge_rx_ring_t *rx = &ss->rx_small; 2399 int cnt, err; 2400 2401 m = m_gethdr(M_NOWAIT, MT_DATA); 2402 if (m == NULL) { 2403 rx->alloc_fail++; 2404 err = ENOBUFS; 2405 goto done; 2406 } 2407 m->m_len = MHLEN; 2408 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2409 &seg, &cnt, BUS_DMA_NOWAIT); 2410 if (err != 0) { 2411 m_free(m); 2412 goto done; 2413 } 2414 rx->info[idx].m = m; 2415 rx->shadow[idx].addr_low = 2416 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2417 rx->shadow[idx].addr_high = 2418 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2419 2420 done: 2421 if ((idx & 7) == 7) 2422 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2423 return err; 2424 } 2425 2426 static int 2427 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2428 { 2429 bus_dma_segment_t seg[3]; 2430 struct mbuf *m; 2431 mxge_rx_ring_t *rx = &ss->rx_big; 2432 int cnt, err, i; 2433 2434 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2435 if (m == NULL) { 2436 rx->alloc_fail++; 2437 err = ENOBUFS; 2438 goto done; 2439 } 2440 m->m_len = rx->mlen; 2441 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2442 seg, &cnt, BUS_DMA_NOWAIT); 2443 if (err != 0) { 2444 m_free(m); 2445 goto done; 2446 } 2447 rx->info[idx].m = m; 2448 rx->shadow[idx].addr_low = 2449 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2450 rx->shadow[idx].addr_high = 2451 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2452 2453 #if MXGE_VIRT_JUMBOS 2454 for (i = 1; i < cnt; i++) { 2455 rx->shadow[idx + i].addr_low = 2456 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2457 rx->shadow[idx + i].addr_high = 2458 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2459 } 2460 #endif 2461 2462 done: 2463 for (i = 0; i < rx->nbufs; i++) { 2464 if ((idx & 7) == 7) { 2465 mxge_submit_8rx(&rx->lanai[idx - 7], 2466 &rx->shadow[idx - 7]); 2467 } 2468 idx++; 2469 } 2470 return err; 2471 } 2472 2473 #ifdef INET6 2474 2475 static uint16_t 2476 mxge_csum_generic(uint16_t *raw, int len) 2477 { 2478 uint32_t csum; 2479 2480 2481 csum = 0; 2482 while (len > 0) { 2483 csum += *raw; 2484 raw++; 2485 len -= 2; 2486 } 2487 csum = (csum >> 16) + (csum & 0xffff); 2488 csum = (csum >> 16) + (csum & 0xffff); 2489 return (uint16_t)csum; 2490 } 2491 2492 static inline uint16_t 2493 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2494 { 2495 uint32_t partial; 2496 int nxt, cksum_offset; 2497 struct ip6_hdr *ip6 = p; 2498 uint16_t c; 2499 2500 nxt = ip6->ip6_nxt; 2501 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2502 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2503 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2504 IPPROTO_IPV6, &nxt); 2505 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2506 return (1); 2507 } 2508 2509 /* 2510 * IPv6 headers do not contain a checksum, and hence 2511 * do not checksum to zero, so they don't "fall out" 2512 * of the partial checksum calculation like IPv4 2513 * headers do. We need to fix the partial checksum by 2514 * subtracting the checksum of the IPv6 header. 2515 */ 2516 2517 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2518 ETHER_HDR_LEN); 2519 csum += ~partial; 2520 csum += (csum < ~partial); 2521 csum = (csum >> 16) + (csum & 0xFFFF); 2522 csum = (csum >> 16) + (csum & 0xFFFF); 2523 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2524 csum); 2525 c ^= 0xffff; 2526 return (c); 2527 } 2528 #endif /* INET6 */ 2529 /* 2530 * Myri10GE hardware checksums are not valid if the sender 2531 * padded the frame with non-zero padding. This is because 2532 * the firmware just does a simple 16-bit 1s complement 2533 * checksum across the entire frame, excluding the first 14 2534 * bytes. It is best to simply to check the checksum and 2535 * tell the stack about it only if the checksum is good 2536 */ 2537 2538 static inline uint16_t 2539 mxge_rx_csum(struct mbuf *m, int csum) 2540 { 2541 struct ether_header *eh; 2542 #ifdef INET 2543 struct ip *ip; 2544 #endif 2545 #if defined(INET) || defined(INET6) 2546 int cap = m->m_pkthdr.rcvif->if_capenable; 2547 #endif 2548 uint16_t c, etype; 2549 2550 2551 eh = mtod(m, struct ether_header *); 2552 etype = ntohs(eh->ether_type); 2553 switch (etype) { 2554 #ifdef INET 2555 case ETHERTYPE_IP: 2556 if ((cap & IFCAP_RXCSUM) == 0) 2557 return (1); 2558 ip = (struct ip *)(eh + 1); 2559 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2560 return (1); 2561 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2562 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2563 (ip->ip_hl << 2) + ip->ip_p)); 2564 c ^= 0xffff; 2565 break; 2566 #endif 2567 #ifdef INET6 2568 case ETHERTYPE_IPV6: 2569 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2570 return (1); 2571 c = mxge_rx_csum6((eh + 1), m, csum); 2572 break; 2573 #endif 2574 default: 2575 c = 1; 2576 } 2577 return (c); 2578 } 2579 2580 static void 2581 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2582 { 2583 struct ether_vlan_header *evl; 2584 struct ether_header *eh; 2585 uint32_t partial; 2586 2587 evl = mtod(m, struct ether_vlan_header *); 2588 eh = mtod(m, struct ether_header *); 2589 2590 /* 2591 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2592 * after what the firmware thought was the end of the ethernet 2593 * header. 2594 */ 2595 2596 /* put checksum into host byte order */ 2597 *csum = ntohs(*csum); 2598 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2599 (*csum) += ~partial; 2600 (*csum) += ((*csum) < ~partial); 2601 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2602 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2603 2604 /* restore checksum to network byte order; 2605 later consumers expect this */ 2606 *csum = htons(*csum); 2607 2608 /* save the tag */ 2609 #ifdef MXGE_NEW_VLAN_API 2610 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2611 #else 2612 { 2613 struct m_tag *mtag; 2614 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2615 M_NOWAIT); 2616 if (mtag == NULL) 2617 return; 2618 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2619 m_tag_prepend(m, mtag); 2620 } 2621 2622 #endif 2623 m->m_flags |= M_VLANTAG; 2624 2625 /* 2626 * Remove the 802.1q header by copying the Ethernet 2627 * addresses over it and adjusting the beginning of 2628 * the data in the mbuf. The encapsulated Ethernet 2629 * type field is already in place. 2630 */ 2631 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2632 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2633 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2634 } 2635 2636 2637 static inline void 2638 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2639 uint32_t csum, int lro) 2640 { 2641 mxge_softc_t *sc; 2642 struct ifnet *ifp; 2643 struct mbuf *m; 2644 struct ether_header *eh; 2645 mxge_rx_ring_t *rx; 2646 bus_dmamap_t old_map; 2647 int idx; 2648 2649 sc = ss->sc; 2650 ifp = sc->ifp; 2651 rx = &ss->rx_big; 2652 idx = rx->cnt & rx->mask; 2653 rx->cnt += rx->nbufs; 2654 /* save a pointer to the received mbuf */ 2655 m = rx->info[idx].m; 2656 /* try to replace the received mbuf */ 2657 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2658 /* drop the frame -- the old mbuf is re-cycled */ 2659 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2660 return; 2661 } 2662 2663 /* unmap the received buffer */ 2664 old_map = rx->info[idx].map; 2665 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2666 bus_dmamap_unload(rx->dmat, old_map); 2667 2668 /* swap the bus_dmamap_t's */ 2669 rx->info[idx].map = rx->extra_map; 2670 rx->extra_map = old_map; 2671 2672 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2673 * aligned */ 2674 m->m_data += MXGEFW_PAD; 2675 2676 m->m_pkthdr.rcvif = ifp; 2677 m->m_len = m->m_pkthdr.len = len; 2678 ss->ipackets++; 2679 eh = mtod(m, struct ether_header *); 2680 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2681 mxge_vlan_tag_remove(m, &csum); 2682 } 2683 /* flowid only valid if RSS hashing is enabled */ 2684 if (sc->num_slices > 1) { 2685 m->m_pkthdr.flowid = (ss - sc->ss); 2686 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2687 } 2688 /* if the checksum is valid, mark it in the mbuf header */ 2689 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2690 (0 == mxge_rx_csum(m, csum))) { 2691 /* Tell the stack that the checksum is good */ 2692 m->m_pkthdr.csum_data = 0xffff; 2693 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2694 CSUM_DATA_VALID; 2695 2696 #if defined(INET) || defined (INET6) 2697 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2698 return; 2699 #endif 2700 } 2701 /* pass the frame up the stack */ 2702 (*ifp->if_input)(ifp, m); 2703 } 2704 2705 static inline void 2706 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2707 uint32_t csum, int lro) 2708 { 2709 mxge_softc_t *sc; 2710 struct ifnet *ifp; 2711 struct ether_header *eh; 2712 struct mbuf *m; 2713 mxge_rx_ring_t *rx; 2714 bus_dmamap_t old_map; 2715 int idx; 2716 2717 sc = ss->sc; 2718 ifp = sc->ifp; 2719 rx = &ss->rx_small; 2720 idx = rx->cnt & rx->mask; 2721 rx->cnt++; 2722 /* save a pointer to the received mbuf */ 2723 m = rx->info[idx].m; 2724 /* try to replace the received mbuf */ 2725 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2726 /* drop the frame -- the old mbuf is re-cycled */ 2727 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 2728 return; 2729 } 2730 2731 /* unmap the received buffer */ 2732 old_map = rx->info[idx].map; 2733 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2734 bus_dmamap_unload(rx->dmat, old_map); 2735 2736 /* swap the bus_dmamap_t's */ 2737 rx->info[idx].map = rx->extra_map; 2738 rx->extra_map = old_map; 2739 2740 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2741 * aligned */ 2742 m->m_data += MXGEFW_PAD; 2743 2744 m->m_pkthdr.rcvif = ifp; 2745 m->m_len = m->m_pkthdr.len = len; 2746 ss->ipackets++; 2747 eh = mtod(m, struct ether_header *); 2748 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2749 mxge_vlan_tag_remove(m, &csum); 2750 } 2751 /* flowid only valid if RSS hashing is enabled */ 2752 if (sc->num_slices > 1) { 2753 m->m_pkthdr.flowid = (ss - sc->ss); 2754 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2755 } 2756 /* if the checksum is valid, mark it in the mbuf header */ 2757 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2758 (0 == mxge_rx_csum(m, csum))) { 2759 /* Tell the stack that the checksum is good */ 2760 m->m_pkthdr.csum_data = 0xffff; 2761 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2762 CSUM_DATA_VALID; 2763 2764 #if defined(INET) || defined (INET6) 2765 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2766 return; 2767 #endif 2768 } 2769 /* pass the frame up the stack */ 2770 (*ifp->if_input)(ifp, m); 2771 } 2772 2773 static inline void 2774 mxge_clean_rx_done(struct mxge_slice_state *ss) 2775 { 2776 mxge_rx_done_t *rx_done = &ss->rx_done; 2777 int limit = 0; 2778 uint16_t length; 2779 uint16_t checksum; 2780 int lro; 2781 2782 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2783 while (rx_done->entry[rx_done->idx].length != 0) { 2784 length = ntohs(rx_done->entry[rx_done->idx].length); 2785 rx_done->entry[rx_done->idx].length = 0; 2786 checksum = rx_done->entry[rx_done->idx].checksum; 2787 if (length <= (MHLEN - MXGEFW_PAD)) 2788 mxge_rx_done_small(ss, length, checksum, lro); 2789 else 2790 mxge_rx_done_big(ss, length, checksum, lro); 2791 rx_done->cnt++; 2792 rx_done->idx = rx_done->cnt & rx_done->mask; 2793 2794 /* limit potential for livelock */ 2795 if (__predict_false(++limit > rx_done->mask / 2)) 2796 break; 2797 } 2798 #if defined(INET) || defined (INET6) 2799 tcp_lro_flush_all(&ss->lc); 2800 #endif 2801 } 2802 2803 2804 static inline void 2805 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2806 { 2807 struct ifnet *ifp; 2808 mxge_tx_ring_t *tx; 2809 struct mbuf *m; 2810 bus_dmamap_t map; 2811 int idx; 2812 int *flags; 2813 2814 tx = &ss->tx; 2815 ifp = ss->sc->ifp; 2816 while (tx->pkt_done != mcp_idx) { 2817 idx = tx->done & tx->mask; 2818 tx->done++; 2819 m = tx->info[idx].m; 2820 /* mbuf and DMA map only attached to the first 2821 segment per-mbuf */ 2822 if (m != NULL) { 2823 ss->obytes += m->m_pkthdr.len; 2824 if (m->m_flags & M_MCAST) 2825 ss->omcasts++; 2826 ss->opackets++; 2827 tx->info[idx].m = NULL; 2828 map = tx->info[idx].map; 2829 bus_dmamap_unload(tx->dmat, map); 2830 m_freem(m); 2831 } 2832 if (tx->info[idx].flag) { 2833 tx->info[idx].flag = 0; 2834 tx->pkt_done++; 2835 } 2836 } 2837 2838 /* If we have space, clear IFF_OACTIVE to tell the stack that 2839 its OK to send packets */ 2840 #ifdef IFNET_BUF_RING 2841 flags = &ss->if_drv_flags; 2842 #else 2843 flags = &ifp->if_drv_flags; 2844 #endif 2845 mtx_lock(&ss->tx.mtx); 2846 if ((*flags) & IFF_DRV_OACTIVE && 2847 tx->req - tx->done < (tx->mask + 1)/4) { 2848 *(flags) &= ~IFF_DRV_OACTIVE; 2849 ss->tx.wake++; 2850 mxge_start_locked(ss); 2851 } 2852 #ifdef IFNET_BUF_RING 2853 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2854 /* let the NIC stop polling this queue, since there 2855 * are no more transmits pending */ 2856 if (tx->req == tx->done) { 2857 *tx->send_stop = 1; 2858 tx->queue_active = 0; 2859 tx->deactivate++; 2860 wmb(); 2861 } 2862 } 2863 #endif 2864 mtx_unlock(&ss->tx.mtx); 2865 2866 } 2867 2868 static struct mxge_media_type mxge_xfp_media_types[] = 2869 { 2870 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2871 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2872 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2873 {0, (1 << 5), "10GBASE-ER"}, 2874 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2875 {0, (1 << 3), "10GBASE-SW"}, 2876 {0, (1 << 2), "10GBASE-LW"}, 2877 {0, (1 << 1), "10GBASE-EW"}, 2878 {0, (1 << 0), "Reserved"} 2879 }; 2880 static struct mxge_media_type mxge_sfp_media_types[] = 2881 { 2882 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2883 {0, (1 << 7), "Reserved"}, 2884 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2885 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2886 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2887 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2888 }; 2889 2890 static void 2891 mxge_media_set(mxge_softc_t *sc, int media_type) 2892 { 2893 2894 2895 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2896 0, NULL); 2897 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2898 sc->current_media = media_type; 2899 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2900 } 2901 2902 static void 2903 mxge_media_init(mxge_softc_t *sc) 2904 { 2905 char *ptr; 2906 int i; 2907 2908 ifmedia_removeall(&sc->media); 2909 mxge_media_set(sc, IFM_AUTO); 2910 2911 /* 2912 * parse the product code to deterimine the interface type 2913 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2914 * after the 3rd dash in the driver's cached copy of the 2915 * EEPROM's product code string. 2916 */ 2917 ptr = sc->product_code_string; 2918 if (ptr == NULL) { 2919 device_printf(sc->dev, "Missing product code\n"); 2920 return; 2921 } 2922 2923 for (i = 0; i < 3; i++, ptr++) { 2924 ptr = strchr(ptr, '-'); 2925 if (ptr == NULL) { 2926 device_printf(sc->dev, 2927 "only %d dashes in PC?!?\n", i); 2928 return; 2929 } 2930 } 2931 if (*ptr == 'C' || *(ptr +1) == 'C') { 2932 /* -C is CX4 */ 2933 sc->connector = MXGE_CX4; 2934 mxge_media_set(sc, IFM_10G_CX4); 2935 } else if (*ptr == 'Q') { 2936 /* -Q is Quad Ribbon Fiber */ 2937 sc->connector = MXGE_QRF; 2938 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2939 /* FreeBSD has no media type for Quad ribbon fiber */ 2940 } else if (*ptr == 'R') { 2941 /* -R is XFP */ 2942 sc->connector = MXGE_XFP; 2943 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2944 /* -S or -2S is SFP+ */ 2945 sc->connector = MXGE_SFP; 2946 } else { 2947 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2948 } 2949 } 2950 2951 /* 2952 * Determine the media type for a NIC. Some XFPs will identify 2953 * themselves only when their link is up, so this is initiated via a 2954 * link up interrupt. However, this can potentially take up to 2955 * several milliseconds, so it is run via the watchdog routine, rather 2956 * than in the interrupt handler itself. 2957 */ 2958 static void 2959 mxge_media_probe(mxge_softc_t *sc) 2960 { 2961 mxge_cmd_t cmd; 2962 char *cage_type; 2963 2964 struct mxge_media_type *mxge_media_types = NULL; 2965 int i, err, ms, mxge_media_type_entries; 2966 uint32_t byte; 2967 2968 sc->need_media_probe = 0; 2969 2970 if (sc->connector == MXGE_XFP) { 2971 /* -R is XFP */ 2972 mxge_media_types = mxge_xfp_media_types; 2973 mxge_media_type_entries = 2974 nitems(mxge_xfp_media_types); 2975 byte = MXGE_XFP_COMPLIANCE_BYTE; 2976 cage_type = "XFP"; 2977 } else if (sc->connector == MXGE_SFP) { 2978 /* -S or -2S is SFP+ */ 2979 mxge_media_types = mxge_sfp_media_types; 2980 mxge_media_type_entries = 2981 nitems(mxge_sfp_media_types); 2982 cage_type = "SFP+"; 2983 byte = 3; 2984 } else { 2985 /* nothing to do; media type cannot change */ 2986 return; 2987 } 2988 2989 /* 2990 * At this point we know the NIC has an XFP cage, so now we 2991 * try to determine what is in the cage by using the 2992 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2993 * register. We read just one byte, which may take over 2994 * a millisecond 2995 */ 2996 2997 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2998 cmd.data1 = byte; 2999 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3000 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3001 device_printf(sc->dev, "failed to read XFP\n"); 3002 } 3003 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3004 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3005 } 3006 if (err != MXGEFW_CMD_OK) { 3007 return; 3008 } 3009 3010 /* now we wait for the data to be cached */ 3011 cmd.data0 = byte; 3012 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3013 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3014 DELAY(1000); 3015 cmd.data0 = byte; 3016 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3017 } 3018 if (err != MXGEFW_CMD_OK) { 3019 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3020 cage_type, err, ms); 3021 return; 3022 } 3023 3024 if (cmd.data0 == mxge_media_types[0].bitmask) { 3025 if (mxge_verbose) 3026 device_printf(sc->dev, "%s:%s\n", cage_type, 3027 mxge_media_types[0].name); 3028 if (sc->current_media != mxge_media_types[0].flag) { 3029 mxge_media_init(sc); 3030 mxge_media_set(sc, mxge_media_types[0].flag); 3031 } 3032 return; 3033 } 3034 for (i = 1; i < mxge_media_type_entries; i++) { 3035 if (cmd.data0 & mxge_media_types[i].bitmask) { 3036 if (mxge_verbose) 3037 device_printf(sc->dev, "%s:%s\n", 3038 cage_type, 3039 mxge_media_types[i].name); 3040 3041 if (sc->current_media != mxge_media_types[i].flag) { 3042 mxge_media_init(sc); 3043 mxge_media_set(sc, mxge_media_types[i].flag); 3044 } 3045 return; 3046 } 3047 } 3048 if (mxge_verbose) 3049 device_printf(sc->dev, "%s media 0x%x unknown\n", 3050 cage_type, cmd.data0); 3051 3052 return; 3053 } 3054 3055 static void 3056 mxge_intr(void *arg) 3057 { 3058 struct mxge_slice_state *ss = arg; 3059 mxge_softc_t *sc = ss->sc; 3060 mcp_irq_data_t *stats = ss->fw_stats; 3061 mxge_tx_ring_t *tx = &ss->tx; 3062 mxge_rx_done_t *rx_done = &ss->rx_done; 3063 uint32_t send_done_count; 3064 uint8_t valid; 3065 3066 3067 #ifndef IFNET_BUF_RING 3068 /* an interrupt on a non-zero slice is implicitly valid 3069 since MSI-X irqs are not shared */ 3070 if (ss != sc->ss) { 3071 mxge_clean_rx_done(ss); 3072 *ss->irq_claim = be32toh(3); 3073 return; 3074 } 3075 #endif 3076 3077 /* make sure the DMA has finished */ 3078 if (!stats->valid) { 3079 return; 3080 } 3081 valid = stats->valid; 3082 3083 if (sc->legacy_irq) { 3084 /* lower legacy IRQ */ 3085 *sc->irq_deassert = 0; 3086 if (!mxge_deassert_wait) 3087 /* don't wait for conf. that irq is low */ 3088 stats->valid = 0; 3089 } else { 3090 stats->valid = 0; 3091 } 3092 3093 /* loop while waiting for legacy irq deassertion */ 3094 do { 3095 /* check for transmit completes and receives */ 3096 send_done_count = be32toh(stats->send_done_count); 3097 while ((send_done_count != tx->pkt_done) || 3098 (rx_done->entry[rx_done->idx].length != 0)) { 3099 if (send_done_count != tx->pkt_done) 3100 mxge_tx_done(ss, (int)send_done_count); 3101 mxge_clean_rx_done(ss); 3102 send_done_count = be32toh(stats->send_done_count); 3103 } 3104 if (sc->legacy_irq && mxge_deassert_wait) 3105 wmb(); 3106 } while (*((volatile uint8_t *) &stats->valid)); 3107 3108 /* fw link & error stats meaningful only on the first slice */ 3109 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3110 if (sc->link_state != stats->link_up) { 3111 sc->link_state = stats->link_up; 3112 if (sc->link_state) { 3113 if_link_state_change(sc->ifp, LINK_STATE_UP); 3114 if (mxge_verbose) 3115 device_printf(sc->dev, "link up\n"); 3116 } else { 3117 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3118 if (mxge_verbose) 3119 device_printf(sc->dev, "link down\n"); 3120 } 3121 sc->need_media_probe = 1; 3122 } 3123 if (sc->rdma_tags_available != 3124 be32toh(stats->rdma_tags_available)) { 3125 sc->rdma_tags_available = 3126 be32toh(stats->rdma_tags_available); 3127 device_printf(sc->dev, "RDMA timed out! %d tags " 3128 "left\n", sc->rdma_tags_available); 3129 } 3130 3131 if (stats->link_down) { 3132 sc->down_cnt += stats->link_down; 3133 sc->link_state = 0; 3134 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3135 } 3136 } 3137 3138 /* check to see if we have rx token to pass back */ 3139 if (valid & 0x1) 3140 *ss->irq_claim = be32toh(3); 3141 *(ss->irq_claim + 1) = be32toh(3); 3142 } 3143 3144 static void 3145 mxge_init(void *arg) 3146 { 3147 mxge_softc_t *sc = arg; 3148 struct ifnet *ifp = sc->ifp; 3149 3150 3151 mtx_lock(&sc->driver_mtx); 3152 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3153 (void) mxge_open(sc); 3154 mtx_unlock(&sc->driver_mtx); 3155 } 3156 3157 3158 3159 static void 3160 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3161 { 3162 int i; 3163 3164 #if defined(INET) || defined(INET6) 3165 tcp_lro_free(&ss->lc); 3166 #endif 3167 for (i = 0; i <= ss->rx_big.mask; i++) { 3168 if (ss->rx_big.info[i].m == NULL) 3169 continue; 3170 bus_dmamap_unload(ss->rx_big.dmat, 3171 ss->rx_big.info[i].map); 3172 m_freem(ss->rx_big.info[i].m); 3173 ss->rx_big.info[i].m = NULL; 3174 } 3175 3176 for (i = 0; i <= ss->rx_small.mask; i++) { 3177 if (ss->rx_small.info[i].m == NULL) 3178 continue; 3179 bus_dmamap_unload(ss->rx_small.dmat, 3180 ss->rx_small.info[i].map); 3181 m_freem(ss->rx_small.info[i].m); 3182 ss->rx_small.info[i].m = NULL; 3183 } 3184 3185 /* transmit ring used only on the first slice */ 3186 if (ss->tx.info == NULL) 3187 return; 3188 3189 for (i = 0; i <= ss->tx.mask; i++) { 3190 ss->tx.info[i].flag = 0; 3191 if (ss->tx.info[i].m == NULL) 3192 continue; 3193 bus_dmamap_unload(ss->tx.dmat, 3194 ss->tx.info[i].map); 3195 m_freem(ss->tx.info[i].m); 3196 ss->tx.info[i].m = NULL; 3197 } 3198 } 3199 3200 static void 3201 mxge_free_mbufs(mxge_softc_t *sc) 3202 { 3203 int slice; 3204 3205 for (slice = 0; slice < sc->num_slices; slice++) 3206 mxge_free_slice_mbufs(&sc->ss[slice]); 3207 } 3208 3209 static void 3210 mxge_free_slice_rings(struct mxge_slice_state *ss) 3211 { 3212 int i; 3213 3214 3215 if (ss->rx_done.entry != NULL) 3216 mxge_dma_free(&ss->rx_done.dma); 3217 ss->rx_done.entry = NULL; 3218 3219 if (ss->tx.req_bytes != NULL) 3220 free(ss->tx.req_bytes, M_DEVBUF); 3221 ss->tx.req_bytes = NULL; 3222 3223 if (ss->tx.seg_list != NULL) 3224 free(ss->tx.seg_list, M_DEVBUF); 3225 ss->tx.seg_list = NULL; 3226 3227 if (ss->rx_small.shadow != NULL) 3228 free(ss->rx_small.shadow, M_DEVBUF); 3229 ss->rx_small.shadow = NULL; 3230 3231 if (ss->rx_big.shadow != NULL) 3232 free(ss->rx_big.shadow, M_DEVBUF); 3233 ss->rx_big.shadow = NULL; 3234 3235 if (ss->tx.info != NULL) { 3236 if (ss->tx.dmat != NULL) { 3237 for (i = 0; i <= ss->tx.mask; i++) { 3238 bus_dmamap_destroy(ss->tx.dmat, 3239 ss->tx.info[i].map); 3240 } 3241 bus_dma_tag_destroy(ss->tx.dmat); 3242 } 3243 free(ss->tx.info, M_DEVBUF); 3244 } 3245 ss->tx.info = NULL; 3246 3247 if (ss->rx_small.info != NULL) { 3248 if (ss->rx_small.dmat != NULL) { 3249 for (i = 0; i <= ss->rx_small.mask; i++) { 3250 bus_dmamap_destroy(ss->rx_small.dmat, 3251 ss->rx_small.info[i].map); 3252 } 3253 bus_dmamap_destroy(ss->rx_small.dmat, 3254 ss->rx_small.extra_map); 3255 bus_dma_tag_destroy(ss->rx_small.dmat); 3256 } 3257 free(ss->rx_small.info, M_DEVBUF); 3258 } 3259 ss->rx_small.info = NULL; 3260 3261 if (ss->rx_big.info != NULL) { 3262 if (ss->rx_big.dmat != NULL) { 3263 for (i = 0; i <= ss->rx_big.mask; i++) { 3264 bus_dmamap_destroy(ss->rx_big.dmat, 3265 ss->rx_big.info[i].map); 3266 } 3267 bus_dmamap_destroy(ss->rx_big.dmat, 3268 ss->rx_big.extra_map); 3269 bus_dma_tag_destroy(ss->rx_big.dmat); 3270 } 3271 free(ss->rx_big.info, M_DEVBUF); 3272 } 3273 ss->rx_big.info = NULL; 3274 } 3275 3276 static void 3277 mxge_free_rings(mxge_softc_t *sc) 3278 { 3279 int slice; 3280 3281 for (slice = 0; slice < sc->num_slices; slice++) 3282 mxge_free_slice_rings(&sc->ss[slice]); 3283 } 3284 3285 static int 3286 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3287 int tx_ring_entries) 3288 { 3289 mxge_softc_t *sc = ss->sc; 3290 size_t bytes; 3291 int err, i; 3292 3293 /* allocate per-slice receive resources */ 3294 3295 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3296 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3297 3298 /* allocate the rx shadow rings */ 3299 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3300 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3301 3302 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3303 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3304 3305 /* allocate the rx host info rings */ 3306 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3307 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3308 3309 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3310 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3311 3312 /* allocate the rx busdma resources */ 3313 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3314 1, /* alignment */ 3315 4096, /* boundary */ 3316 BUS_SPACE_MAXADDR, /* low */ 3317 BUS_SPACE_MAXADDR, /* high */ 3318 NULL, NULL, /* filter */ 3319 MHLEN, /* maxsize */ 3320 1, /* num segs */ 3321 MHLEN, /* maxsegsize */ 3322 BUS_DMA_ALLOCNOW, /* flags */ 3323 NULL, NULL, /* lock */ 3324 &ss->rx_small.dmat); /* tag */ 3325 if (err != 0) { 3326 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3327 err); 3328 return err; 3329 } 3330 3331 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3332 1, /* alignment */ 3333 #if MXGE_VIRT_JUMBOS 3334 4096, /* boundary */ 3335 #else 3336 0, /* boundary */ 3337 #endif 3338 BUS_SPACE_MAXADDR, /* low */ 3339 BUS_SPACE_MAXADDR, /* high */ 3340 NULL, NULL, /* filter */ 3341 3*4096, /* maxsize */ 3342 #if MXGE_VIRT_JUMBOS 3343 3, /* num segs */ 3344 4096, /* maxsegsize*/ 3345 #else 3346 1, /* num segs */ 3347 MJUM9BYTES, /* maxsegsize*/ 3348 #endif 3349 BUS_DMA_ALLOCNOW, /* flags */ 3350 NULL, NULL, /* lock */ 3351 &ss->rx_big.dmat); /* tag */ 3352 if (err != 0) { 3353 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3354 err); 3355 return err; 3356 } 3357 for (i = 0; i <= ss->rx_small.mask; i++) { 3358 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3359 &ss->rx_small.info[i].map); 3360 if (err != 0) { 3361 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3362 err); 3363 return err; 3364 } 3365 } 3366 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3367 &ss->rx_small.extra_map); 3368 if (err != 0) { 3369 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3370 err); 3371 return err; 3372 } 3373 3374 for (i = 0; i <= ss->rx_big.mask; i++) { 3375 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3376 &ss->rx_big.info[i].map); 3377 if (err != 0) { 3378 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3379 err); 3380 return err; 3381 } 3382 } 3383 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3384 &ss->rx_big.extra_map); 3385 if (err != 0) { 3386 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3387 err); 3388 return err; 3389 } 3390 3391 /* now allocate TX resources */ 3392 3393 #ifndef IFNET_BUF_RING 3394 /* only use a single TX ring for now */ 3395 if (ss != ss->sc->ss) 3396 return 0; 3397 #endif 3398 3399 ss->tx.mask = tx_ring_entries - 1; 3400 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3401 3402 3403 /* allocate the tx request copy block */ 3404 bytes = 8 + 3405 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3406 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3407 /* ensure req_list entries are aligned to 8 bytes */ 3408 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3409 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3410 3411 /* allocate the tx busdma segment list */ 3412 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3413 ss->tx.seg_list = (bus_dma_segment_t *) 3414 malloc(bytes, M_DEVBUF, M_WAITOK); 3415 3416 /* allocate the tx host info ring */ 3417 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3418 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3419 3420 /* allocate the tx busdma resources */ 3421 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3422 1, /* alignment */ 3423 sc->tx_boundary, /* boundary */ 3424 BUS_SPACE_MAXADDR, /* low */ 3425 BUS_SPACE_MAXADDR, /* high */ 3426 NULL, NULL, /* filter */ 3427 65536 + 256, /* maxsize */ 3428 ss->tx.max_desc - 2, /* num segs */ 3429 sc->tx_boundary, /* maxsegsz */ 3430 BUS_DMA_ALLOCNOW, /* flags */ 3431 NULL, NULL, /* lock */ 3432 &ss->tx.dmat); /* tag */ 3433 3434 if (err != 0) { 3435 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3436 err); 3437 return err; 3438 } 3439 3440 /* now use these tags to setup dmamaps for each slot 3441 in the ring */ 3442 for (i = 0; i <= ss->tx.mask; i++) { 3443 err = bus_dmamap_create(ss->tx.dmat, 0, 3444 &ss->tx.info[i].map); 3445 if (err != 0) { 3446 device_printf(sc->dev, "Err %d tx dmamap\n", 3447 err); 3448 return err; 3449 } 3450 } 3451 return 0; 3452 3453 } 3454 3455 static int 3456 mxge_alloc_rings(mxge_softc_t *sc) 3457 { 3458 mxge_cmd_t cmd; 3459 int tx_ring_size; 3460 int tx_ring_entries, rx_ring_entries; 3461 int err, slice; 3462 3463 /* get ring sizes */ 3464 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3465 tx_ring_size = cmd.data0; 3466 if (err != 0) { 3467 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3468 goto abort; 3469 } 3470 3471 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3472 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3473 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3474 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3475 IFQ_SET_READY(&sc->ifp->if_snd); 3476 3477 for (slice = 0; slice < sc->num_slices; slice++) { 3478 err = mxge_alloc_slice_rings(&sc->ss[slice], 3479 rx_ring_entries, 3480 tx_ring_entries); 3481 if (err != 0) 3482 goto abort; 3483 } 3484 return 0; 3485 3486 abort: 3487 mxge_free_rings(sc); 3488 return err; 3489 3490 } 3491 3492 3493 static void 3494 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3495 { 3496 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3497 3498 if (bufsize < MCLBYTES) { 3499 /* easy, everything fits in a single buffer */ 3500 *big_buf_size = MCLBYTES; 3501 *cl_size = MCLBYTES; 3502 *nbufs = 1; 3503 return; 3504 } 3505 3506 if (bufsize < MJUMPAGESIZE) { 3507 /* still easy, everything still fits in a single buffer */ 3508 *big_buf_size = MJUMPAGESIZE; 3509 *cl_size = MJUMPAGESIZE; 3510 *nbufs = 1; 3511 return; 3512 } 3513 #if MXGE_VIRT_JUMBOS 3514 /* now we need to use virtually contiguous buffers */ 3515 *cl_size = MJUM9BYTES; 3516 *big_buf_size = 4096; 3517 *nbufs = mtu / 4096 + 1; 3518 /* needs to be a power of two, so round up */ 3519 if (*nbufs == 3) 3520 *nbufs = 4; 3521 #else 3522 *cl_size = MJUM9BYTES; 3523 *big_buf_size = MJUM9BYTES; 3524 *nbufs = 1; 3525 #endif 3526 } 3527 3528 static int 3529 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3530 { 3531 mxge_softc_t *sc; 3532 mxge_cmd_t cmd; 3533 bus_dmamap_t map; 3534 int err, i, slice; 3535 3536 3537 sc = ss->sc; 3538 slice = ss - sc->ss; 3539 3540 #if defined(INET) || defined(INET6) 3541 (void)tcp_lro_init(&ss->lc); 3542 #endif 3543 ss->lc.ifp = sc->ifp; 3544 3545 /* get the lanai pointers to the send and receive rings */ 3546 3547 err = 0; 3548 #ifndef IFNET_BUF_RING 3549 /* We currently only send from the first slice */ 3550 if (slice == 0) { 3551 #endif 3552 cmd.data0 = slice; 3553 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3554 ss->tx.lanai = 3555 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3556 ss->tx.send_go = (volatile uint32_t *) 3557 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3558 ss->tx.send_stop = (volatile uint32_t *) 3559 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3560 #ifndef IFNET_BUF_RING 3561 } 3562 #endif 3563 cmd.data0 = slice; 3564 err |= mxge_send_cmd(sc, 3565 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3566 ss->rx_small.lanai = 3567 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3568 cmd.data0 = slice; 3569 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3570 ss->rx_big.lanai = 3571 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3572 3573 if (err != 0) { 3574 device_printf(sc->dev, 3575 "failed to get ring sizes or locations\n"); 3576 return EIO; 3577 } 3578 3579 /* stock receive rings */ 3580 for (i = 0; i <= ss->rx_small.mask; i++) { 3581 map = ss->rx_small.info[i].map; 3582 err = mxge_get_buf_small(ss, map, i); 3583 if (err) { 3584 device_printf(sc->dev, "alloced %d/%d smalls\n", 3585 i, ss->rx_small.mask + 1); 3586 return ENOMEM; 3587 } 3588 } 3589 for (i = 0; i <= ss->rx_big.mask; i++) { 3590 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3591 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3592 } 3593 ss->rx_big.nbufs = nbufs; 3594 ss->rx_big.cl_size = cl_size; 3595 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3596 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3597 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3598 map = ss->rx_big.info[i].map; 3599 err = mxge_get_buf_big(ss, map, i); 3600 if (err) { 3601 device_printf(sc->dev, "alloced %d/%d bigs\n", 3602 i, ss->rx_big.mask + 1); 3603 return ENOMEM; 3604 } 3605 } 3606 return 0; 3607 } 3608 3609 static int 3610 mxge_open(mxge_softc_t *sc) 3611 { 3612 mxge_cmd_t cmd; 3613 int err, big_bytes, nbufs, slice, cl_size, i; 3614 bus_addr_t bus; 3615 volatile uint8_t *itable; 3616 struct mxge_slice_state *ss; 3617 3618 /* Copy the MAC address in case it was overridden */ 3619 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3620 3621 err = mxge_reset(sc, 1); 3622 if (err != 0) { 3623 device_printf(sc->dev, "failed to reset\n"); 3624 return EIO; 3625 } 3626 3627 if (sc->num_slices > 1) { 3628 /* setup the indirection table */ 3629 cmd.data0 = sc->num_slices; 3630 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3631 &cmd); 3632 3633 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3634 &cmd); 3635 if (err != 0) { 3636 device_printf(sc->dev, 3637 "failed to setup rss tables\n"); 3638 return err; 3639 } 3640 3641 /* just enable an identity mapping */ 3642 itable = sc->sram + cmd.data0; 3643 for (i = 0; i < sc->num_slices; i++) 3644 itable[i] = (uint8_t)i; 3645 3646 cmd.data0 = 1; 3647 cmd.data1 = mxge_rss_hash_type; 3648 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3649 if (err != 0) { 3650 device_printf(sc->dev, "failed to enable slices\n"); 3651 return err; 3652 } 3653 } 3654 3655 3656 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3657 3658 cmd.data0 = nbufs; 3659 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3660 &cmd); 3661 /* error is only meaningful if we're trying to set 3662 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3663 if (err && nbufs > 1) { 3664 device_printf(sc->dev, 3665 "Failed to set alway-use-n to %d\n", 3666 nbufs); 3667 return EIO; 3668 } 3669 /* Give the firmware the mtu and the big and small buffer 3670 sizes. The firmware wants the big buf size to be a power 3671 of two. Luckily, FreeBSD's clusters are powers of two */ 3672 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3673 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3674 cmd.data0 = MHLEN - MXGEFW_PAD; 3675 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3676 &cmd); 3677 cmd.data0 = big_bytes; 3678 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3679 3680 if (err != 0) { 3681 device_printf(sc->dev, "failed to setup params\n"); 3682 goto abort; 3683 } 3684 3685 /* Now give him the pointer to the stats block */ 3686 for (slice = 0; 3687 #ifdef IFNET_BUF_RING 3688 slice < sc->num_slices; 3689 #else 3690 slice < 1; 3691 #endif 3692 slice++) { 3693 ss = &sc->ss[slice]; 3694 cmd.data0 = 3695 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3696 cmd.data1 = 3697 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3698 cmd.data2 = sizeof(struct mcp_irq_data); 3699 cmd.data2 |= (slice << 16); 3700 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3701 } 3702 3703 if (err != 0) { 3704 bus = sc->ss->fw_stats_dma.bus_addr; 3705 bus += offsetof(struct mcp_irq_data, send_done_count); 3706 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3707 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3708 err = mxge_send_cmd(sc, 3709 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3710 &cmd); 3711 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3712 sc->fw_multicast_support = 0; 3713 } else { 3714 sc->fw_multicast_support = 1; 3715 } 3716 3717 if (err != 0) { 3718 device_printf(sc->dev, "failed to setup params\n"); 3719 goto abort; 3720 } 3721 3722 for (slice = 0; slice < sc->num_slices; slice++) { 3723 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3724 if (err != 0) { 3725 device_printf(sc->dev, "couldn't open slice %d\n", 3726 slice); 3727 goto abort; 3728 } 3729 } 3730 3731 /* Finally, start the firmware running */ 3732 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3733 if (err) { 3734 device_printf(sc->dev, "Couldn't bring up link\n"); 3735 goto abort; 3736 } 3737 #ifdef IFNET_BUF_RING 3738 for (slice = 0; slice < sc->num_slices; slice++) { 3739 ss = &sc->ss[slice]; 3740 ss->if_drv_flags |= IFF_DRV_RUNNING; 3741 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3742 } 3743 #endif 3744 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3745 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3746 3747 return 0; 3748 3749 3750 abort: 3751 mxge_free_mbufs(sc); 3752 3753 return err; 3754 } 3755 3756 static int 3757 mxge_close(mxge_softc_t *sc, int down) 3758 { 3759 mxge_cmd_t cmd; 3760 int err, old_down_cnt; 3761 #ifdef IFNET_BUF_RING 3762 struct mxge_slice_state *ss; 3763 int slice; 3764 #endif 3765 3766 #ifdef IFNET_BUF_RING 3767 for (slice = 0; slice < sc->num_slices; slice++) { 3768 ss = &sc->ss[slice]; 3769 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3770 } 3771 #endif 3772 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3773 if (!down) { 3774 old_down_cnt = sc->down_cnt; 3775 wmb(); 3776 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3777 if (err) { 3778 device_printf(sc->dev, 3779 "Couldn't bring down link\n"); 3780 } 3781 if (old_down_cnt == sc->down_cnt) { 3782 /* wait for down irq */ 3783 DELAY(10 * sc->intr_coal_delay); 3784 } 3785 wmb(); 3786 if (old_down_cnt == sc->down_cnt) { 3787 device_printf(sc->dev, "never got down irq\n"); 3788 } 3789 } 3790 mxge_free_mbufs(sc); 3791 3792 return 0; 3793 } 3794 3795 static void 3796 mxge_setup_cfg_space(mxge_softc_t *sc) 3797 { 3798 device_t dev = sc->dev; 3799 int reg; 3800 uint16_t lnk, pectl; 3801 3802 /* find the PCIe link width and set max read request to 4KB*/ 3803 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3804 lnk = pci_read_config(dev, reg + 0x12, 2); 3805 sc->link_width = (lnk >> 4) & 0x3f; 3806 3807 if (sc->pectl == 0) { 3808 pectl = pci_read_config(dev, reg + 0x8, 2); 3809 pectl = (pectl & ~0x7000) | (5 << 12); 3810 pci_write_config(dev, reg + 0x8, pectl, 2); 3811 sc->pectl = pectl; 3812 } else { 3813 /* restore saved pectl after watchdog reset */ 3814 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3815 } 3816 } 3817 3818 /* Enable DMA and Memory space access */ 3819 pci_enable_busmaster(dev); 3820 } 3821 3822 static uint32_t 3823 mxge_read_reboot(mxge_softc_t *sc) 3824 { 3825 device_t dev = sc->dev; 3826 uint32_t vs; 3827 3828 /* find the vendor specific offset */ 3829 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3830 device_printf(sc->dev, 3831 "could not find vendor specific offset\n"); 3832 return (uint32_t)-1; 3833 } 3834 /* enable read32 mode */ 3835 pci_write_config(dev, vs + 0x10, 0x3, 1); 3836 /* tell NIC which register to read */ 3837 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3838 return (pci_read_config(dev, vs + 0x14, 4)); 3839 } 3840 3841 static void 3842 mxge_watchdog_reset(mxge_softc_t *sc) 3843 { 3844 struct pci_devinfo *dinfo; 3845 struct mxge_slice_state *ss; 3846 int err, running, s, num_tx_slices = 1; 3847 uint32_t reboot; 3848 uint16_t cmd; 3849 3850 err = ENXIO; 3851 3852 device_printf(sc->dev, "Watchdog reset!\n"); 3853 3854 /* 3855 * check to see if the NIC rebooted. If it did, then all of 3856 * PCI config space has been reset, and things like the 3857 * busmaster bit will be zero. If this is the case, then we 3858 * must restore PCI config space before the NIC can be used 3859 * again 3860 */ 3861 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3862 if (cmd == 0xffff) { 3863 /* 3864 * maybe the watchdog caught the NIC rebooting; wait 3865 * up to 100ms for it to finish. If it does not come 3866 * back, then give up 3867 */ 3868 DELAY(1000*100); 3869 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3870 if (cmd == 0xffff) { 3871 device_printf(sc->dev, "NIC disappeared!\n"); 3872 } 3873 } 3874 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3875 /* print the reboot status */ 3876 reboot = mxge_read_reboot(sc); 3877 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3878 reboot); 3879 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3880 if (running) { 3881 3882 /* 3883 * quiesce NIC so that TX routines will not try to 3884 * xmit after restoration of BAR 3885 */ 3886 3887 /* Mark the link as down */ 3888 if (sc->link_state) { 3889 sc->link_state = 0; 3890 if_link_state_change(sc->ifp, 3891 LINK_STATE_DOWN); 3892 } 3893 #ifdef IFNET_BUF_RING 3894 num_tx_slices = sc->num_slices; 3895 #endif 3896 /* grab all TX locks to ensure no tx */ 3897 for (s = 0; s < num_tx_slices; s++) { 3898 ss = &sc->ss[s]; 3899 mtx_lock(&ss->tx.mtx); 3900 } 3901 mxge_close(sc, 1); 3902 } 3903 /* restore PCI configuration space */ 3904 dinfo = device_get_ivars(sc->dev); 3905 pci_cfg_restore(sc->dev, dinfo); 3906 3907 /* and redo any changes we made to our config space */ 3908 mxge_setup_cfg_space(sc); 3909 3910 /* reload f/w */ 3911 err = mxge_load_firmware(sc, 0); 3912 if (err) { 3913 device_printf(sc->dev, 3914 "Unable to re-load f/w\n"); 3915 } 3916 if (running) { 3917 if (!err) 3918 err = mxge_open(sc); 3919 /* release all TX locks */ 3920 for (s = 0; s < num_tx_slices; s++) { 3921 ss = &sc->ss[s]; 3922 #ifdef IFNET_BUF_RING 3923 mxge_start_locked(ss); 3924 #endif 3925 mtx_unlock(&ss->tx.mtx); 3926 } 3927 } 3928 sc->watchdog_resets++; 3929 } else { 3930 device_printf(sc->dev, 3931 "NIC did not reboot, not resetting\n"); 3932 err = 0; 3933 } 3934 if (err) { 3935 device_printf(sc->dev, "watchdog reset failed\n"); 3936 } else { 3937 if (sc->dying == 2) 3938 sc->dying = 0; 3939 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3940 } 3941 } 3942 3943 static void 3944 mxge_watchdog_task(void *arg, int pending) 3945 { 3946 mxge_softc_t *sc = arg; 3947 3948 3949 mtx_lock(&sc->driver_mtx); 3950 mxge_watchdog_reset(sc); 3951 mtx_unlock(&sc->driver_mtx); 3952 } 3953 3954 static void 3955 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3956 { 3957 tx = &sc->ss[slice].tx; 3958 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3959 device_printf(sc->dev, 3960 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3961 tx->req, tx->done, tx->queue_active); 3962 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3963 tx->activate, tx->deactivate); 3964 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3965 tx->pkt_done, 3966 be32toh(sc->ss->fw_stats->send_done_count)); 3967 } 3968 3969 static int 3970 mxge_watchdog(mxge_softc_t *sc) 3971 { 3972 mxge_tx_ring_t *tx; 3973 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3974 int i, err = 0; 3975 3976 /* see if we have outstanding transmits, which 3977 have been pending for more than mxge_ticks */ 3978 for (i = 0; 3979 #ifdef IFNET_BUF_RING 3980 (i < sc->num_slices) && (err == 0); 3981 #else 3982 (i < 1) && (err == 0); 3983 #endif 3984 i++) { 3985 tx = &sc->ss[i].tx; 3986 if (tx->req != tx->done && 3987 tx->watchdog_req != tx->watchdog_done && 3988 tx->done == tx->watchdog_done) { 3989 /* check for pause blocking before resetting */ 3990 if (tx->watchdog_rx_pause == rx_pause) { 3991 mxge_warn_stuck(sc, tx, i); 3992 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3993 return (ENXIO); 3994 } 3995 else 3996 device_printf(sc->dev, "Flow control blocking " 3997 "xmits, check link partner\n"); 3998 } 3999 4000 tx->watchdog_req = tx->req; 4001 tx->watchdog_done = tx->done; 4002 tx->watchdog_rx_pause = rx_pause; 4003 } 4004 4005 if (sc->need_media_probe) 4006 mxge_media_probe(sc); 4007 return (err); 4008 } 4009 4010 static uint64_t 4011 mxge_get_counter(struct ifnet *ifp, ift_counter cnt) 4012 { 4013 struct mxge_softc *sc; 4014 uint64_t rv; 4015 4016 sc = if_getsoftc(ifp); 4017 rv = 0; 4018 4019 switch (cnt) { 4020 case IFCOUNTER_IPACKETS: 4021 for (int s = 0; s < sc->num_slices; s++) 4022 rv += sc->ss[s].ipackets; 4023 return (rv); 4024 case IFCOUNTER_OPACKETS: 4025 for (int s = 0; s < sc->num_slices; s++) 4026 rv += sc->ss[s].opackets; 4027 return (rv); 4028 case IFCOUNTER_OERRORS: 4029 for (int s = 0; s < sc->num_slices; s++) 4030 rv += sc->ss[s].oerrors; 4031 return (rv); 4032 #ifdef IFNET_BUF_RING 4033 case IFCOUNTER_OBYTES: 4034 for (int s = 0; s < sc->num_slices; s++) 4035 rv += sc->ss[s].obytes; 4036 return (rv); 4037 case IFCOUNTER_OMCASTS: 4038 for (int s = 0; s < sc->num_slices; s++) 4039 rv += sc->ss[s].omcasts; 4040 return (rv); 4041 case IFCOUNTER_OQDROPS: 4042 for (int s = 0; s < sc->num_slices; s++) 4043 rv += sc->ss[s].tx.br->br_drops; 4044 return (rv); 4045 #endif 4046 default: 4047 return (if_get_counter_default(ifp, cnt)); 4048 } 4049 } 4050 4051 static void 4052 mxge_tick(void *arg) 4053 { 4054 mxge_softc_t *sc = arg; 4055 u_long pkts = 0; 4056 int err = 0; 4057 int running, ticks; 4058 uint16_t cmd; 4059 4060 ticks = mxge_ticks; 4061 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4062 if (running) { 4063 if (!sc->watchdog_countdown) { 4064 err = mxge_watchdog(sc); 4065 sc->watchdog_countdown = 4; 4066 } 4067 sc->watchdog_countdown--; 4068 } 4069 if (pkts == 0) { 4070 /* ensure NIC did not suffer h/w fault while idle */ 4071 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4072 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4073 sc->dying = 2; 4074 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4075 err = ENXIO; 4076 } 4077 /* look less often if NIC is idle */ 4078 ticks *= 4; 4079 } 4080 4081 if (err == 0) 4082 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4083 4084 } 4085 4086 static int 4087 mxge_media_change(struct ifnet *ifp) 4088 { 4089 return EINVAL; 4090 } 4091 4092 static int 4093 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4094 { 4095 struct ifnet *ifp = sc->ifp; 4096 int real_mtu, old_mtu; 4097 int err = 0; 4098 4099 4100 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4101 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4102 return EINVAL; 4103 mtx_lock(&sc->driver_mtx); 4104 old_mtu = ifp->if_mtu; 4105 ifp->if_mtu = mtu; 4106 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4107 mxge_close(sc, 0); 4108 err = mxge_open(sc); 4109 if (err != 0) { 4110 ifp->if_mtu = old_mtu; 4111 mxge_close(sc, 0); 4112 (void) mxge_open(sc); 4113 } 4114 } 4115 mtx_unlock(&sc->driver_mtx); 4116 return err; 4117 } 4118 4119 static void 4120 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4121 { 4122 mxge_softc_t *sc = ifp->if_softc; 4123 4124 4125 if (sc == NULL) 4126 return; 4127 ifmr->ifm_status = IFM_AVALID; 4128 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4129 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4130 ifmr->ifm_active |= sc->current_media; 4131 } 4132 4133 static int 4134 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c) 4135 { 4136 mxge_cmd_t cmd; 4137 uint32_t i2c_args; 4138 int i, ms, err; 4139 4140 4141 if (i2c->dev_addr != 0xA0 && 4142 i2c->dev_addr != 0xA2) 4143 return (EINVAL); 4144 if (i2c->len > sizeof(i2c->data)) 4145 return (EINVAL); 4146 4147 for (i = 0; i < i2c->len; i++) { 4148 i2c_args = i2c->dev_addr << 0x8; 4149 i2c_args |= i2c->offset + i; 4150 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 4151 cmd.data1 = i2c_args; 4152 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 4153 4154 if (err != MXGEFW_CMD_OK) 4155 return (EIO); 4156 /* now we wait for the data to be cached */ 4157 cmd.data0 = i2c_args & 0xff; 4158 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4159 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 4160 cmd.data0 = i2c_args & 0xff; 4161 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 4162 if (err == EBUSY) 4163 DELAY(1000); 4164 } 4165 if (err != MXGEFW_CMD_OK) 4166 return (EIO); 4167 i2c->data[i] = cmd.data0; 4168 } 4169 return (0); 4170 } 4171 4172 static int 4173 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4174 { 4175 mxge_softc_t *sc = ifp->if_softc; 4176 struct ifreq *ifr = (struct ifreq *)data; 4177 struct ifi2creq i2c; 4178 int err, mask; 4179 4180 err = 0; 4181 switch (command) { 4182 case SIOCSIFMTU: 4183 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4184 break; 4185 4186 case SIOCSIFFLAGS: 4187 mtx_lock(&sc->driver_mtx); 4188 if (sc->dying) { 4189 mtx_unlock(&sc->driver_mtx); 4190 return EINVAL; 4191 } 4192 if (ifp->if_flags & IFF_UP) { 4193 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4194 err = mxge_open(sc); 4195 } else { 4196 /* take care of promis can allmulti 4197 flag chages */ 4198 mxge_change_promisc(sc, 4199 ifp->if_flags & IFF_PROMISC); 4200 mxge_set_multicast_list(sc); 4201 } 4202 } else { 4203 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4204 mxge_close(sc, 0); 4205 } 4206 } 4207 mtx_unlock(&sc->driver_mtx); 4208 break; 4209 4210 case SIOCADDMULTI: 4211 case SIOCDELMULTI: 4212 mtx_lock(&sc->driver_mtx); 4213 if (sc->dying) { 4214 mtx_unlock(&sc->driver_mtx); 4215 return (EINVAL); 4216 } 4217 mxge_set_multicast_list(sc); 4218 mtx_unlock(&sc->driver_mtx); 4219 break; 4220 4221 case SIOCSIFCAP: 4222 mtx_lock(&sc->driver_mtx); 4223 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4224 if (mask & IFCAP_TXCSUM) { 4225 if (IFCAP_TXCSUM & ifp->if_capenable) { 4226 mask &= ~IFCAP_TSO4; 4227 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4228 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4229 } else { 4230 ifp->if_capenable |= IFCAP_TXCSUM; 4231 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4232 } 4233 } 4234 if (mask & IFCAP_RXCSUM) { 4235 if (IFCAP_RXCSUM & ifp->if_capenable) { 4236 ifp->if_capenable &= ~IFCAP_RXCSUM; 4237 } else { 4238 ifp->if_capenable |= IFCAP_RXCSUM; 4239 } 4240 } 4241 if (mask & IFCAP_TSO4) { 4242 if (IFCAP_TSO4 & ifp->if_capenable) { 4243 ifp->if_capenable &= ~IFCAP_TSO4; 4244 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4245 ifp->if_capenable |= IFCAP_TSO4; 4246 ifp->if_hwassist |= CSUM_TSO; 4247 } else { 4248 printf("mxge requires tx checksum offload" 4249 " be enabled to use TSO\n"); 4250 err = EINVAL; 4251 } 4252 } 4253 #if IFCAP_TSO6 4254 if (mask & IFCAP_TXCSUM_IPV6) { 4255 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4256 mask &= ~IFCAP_TSO6; 4257 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4258 | IFCAP_TSO6); 4259 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4260 | CSUM_UDP); 4261 } else { 4262 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4263 ifp->if_hwassist |= (CSUM_TCP_IPV6 4264 | CSUM_UDP_IPV6); 4265 } 4266 } 4267 if (mask & IFCAP_RXCSUM_IPV6) { 4268 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4269 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4270 } else { 4271 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4272 } 4273 } 4274 if (mask & IFCAP_TSO6) { 4275 if (IFCAP_TSO6 & ifp->if_capenable) { 4276 ifp->if_capenable &= ~IFCAP_TSO6; 4277 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4278 ifp->if_capenable |= IFCAP_TSO6; 4279 ifp->if_hwassist |= CSUM_TSO; 4280 } else { 4281 printf("mxge requires tx checksum offload" 4282 " be enabled to use TSO\n"); 4283 err = EINVAL; 4284 } 4285 } 4286 #endif /*IFCAP_TSO6 */ 4287 4288 if (mask & IFCAP_LRO) 4289 ifp->if_capenable ^= IFCAP_LRO; 4290 if (mask & IFCAP_VLAN_HWTAGGING) 4291 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4292 if (mask & IFCAP_VLAN_HWTSO) 4293 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4294 4295 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4296 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4297 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4298 4299 mtx_unlock(&sc->driver_mtx); 4300 VLAN_CAPABILITIES(ifp); 4301 4302 break; 4303 4304 case SIOCGIFMEDIA: 4305 mtx_lock(&sc->driver_mtx); 4306 if (sc->dying) { 4307 mtx_unlock(&sc->driver_mtx); 4308 return (EINVAL); 4309 } 4310 mxge_media_probe(sc); 4311 mtx_unlock(&sc->driver_mtx); 4312 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4313 &sc->media, command); 4314 break; 4315 4316 case SIOCGI2C: 4317 if (sc->connector != MXGE_XFP && 4318 sc->connector != MXGE_SFP) { 4319 err = ENXIO; 4320 break; 4321 } 4322 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4323 if (err != 0) 4324 break; 4325 mtx_lock(&sc->driver_mtx); 4326 if (sc->dying) { 4327 mtx_unlock(&sc->driver_mtx); 4328 return (EINVAL); 4329 } 4330 err = mxge_fetch_i2c(sc, &i2c); 4331 mtx_unlock(&sc->driver_mtx); 4332 if (err == 0) 4333 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4334 sizeof(i2c)); 4335 break; 4336 default: 4337 err = ether_ioctl(ifp, command, data); 4338 break; 4339 } 4340 return err; 4341 } 4342 4343 static void 4344 mxge_fetch_tunables(mxge_softc_t *sc) 4345 { 4346 4347 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4348 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4349 &mxge_flow_control); 4350 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4351 &mxge_intr_coal_delay); 4352 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4353 &mxge_nvidia_ecrc_enable); 4354 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4355 &mxge_force_firmware); 4356 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4357 &mxge_deassert_wait); 4358 TUNABLE_INT_FETCH("hw.mxge.verbose", 4359 &mxge_verbose); 4360 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4361 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4362 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4363 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4364 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4365 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4366 4367 if (bootverbose) 4368 mxge_verbose = 1; 4369 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4370 mxge_intr_coal_delay = 30; 4371 if (mxge_ticks == 0) 4372 mxge_ticks = hz / 2; 4373 sc->pause = mxge_flow_control; 4374 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4375 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4376 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4377 } 4378 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4379 mxge_initial_mtu < ETHER_MIN_LEN) 4380 mxge_initial_mtu = ETHERMTU_JUMBO; 4381 4382 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4383 mxge_throttle = MXGE_MAX_THROTTLE; 4384 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4385 mxge_throttle = MXGE_MIN_THROTTLE; 4386 sc->throttle = mxge_throttle; 4387 } 4388 4389 4390 static void 4391 mxge_free_slices(mxge_softc_t *sc) 4392 { 4393 struct mxge_slice_state *ss; 4394 int i; 4395 4396 4397 if (sc->ss == NULL) 4398 return; 4399 4400 for (i = 0; i < sc->num_slices; i++) { 4401 ss = &sc->ss[i]; 4402 if (ss->fw_stats != NULL) { 4403 mxge_dma_free(&ss->fw_stats_dma); 4404 ss->fw_stats = NULL; 4405 #ifdef IFNET_BUF_RING 4406 if (ss->tx.br != NULL) { 4407 drbr_free(ss->tx.br, M_DEVBUF); 4408 ss->tx.br = NULL; 4409 } 4410 #endif 4411 mtx_destroy(&ss->tx.mtx); 4412 } 4413 if (ss->rx_done.entry != NULL) { 4414 mxge_dma_free(&ss->rx_done.dma); 4415 ss->rx_done.entry = NULL; 4416 } 4417 } 4418 free(sc->ss, M_DEVBUF); 4419 sc->ss = NULL; 4420 } 4421 4422 static int 4423 mxge_alloc_slices(mxge_softc_t *sc) 4424 { 4425 mxge_cmd_t cmd; 4426 struct mxge_slice_state *ss; 4427 size_t bytes; 4428 int err, i, max_intr_slots; 4429 4430 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4431 if (err != 0) { 4432 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4433 return err; 4434 } 4435 sc->rx_ring_size = cmd.data0; 4436 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4437 4438 bytes = sizeof (*sc->ss) * sc->num_slices; 4439 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4440 if (sc->ss == NULL) 4441 return (ENOMEM); 4442 for (i = 0; i < sc->num_slices; i++) { 4443 ss = &sc->ss[i]; 4444 4445 ss->sc = sc; 4446 4447 /* allocate per-slice rx interrupt queues */ 4448 4449 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4450 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4451 if (err != 0) 4452 goto abort; 4453 ss->rx_done.entry = ss->rx_done.dma.addr; 4454 bzero(ss->rx_done.entry, bytes); 4455 4456 /* 4457 * allocate the per-slice firmware stats; stats 4458 * (including tx) are used used only on the first 4459 * slice for now 4460 */ 4461 #ifndef IFNET_BUF_RING 4462 if (i > 0) 4463 continue; 4464 #endif 4465 4466 bytes = sizeof (*ss->fw_stats); 4467 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4468 sizeof (*ss->fw_stats), 64); 4469 if (err != 0) 4470 goto abort; 4471 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4472 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4473 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4474 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4475 #ifdef IFNET_BUF_RING 4476 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4477 &ss->tx.mtx); 4478 #endif 4479 } 4480 4481 return (0); 4482 4483 abort: 4484 mxge_free_slices(sc); 4485 return (ENOMEM); 4486 } 4487 4488 static void 4489 mxge_slice_probe(mxge_softc_t *sc) 4490 { 4491 mxge_cmd_t cmd; 4492 char *old_fw; 4493 int msix_cnt, status, max_intr_slots; 4494 4495 sc->num_slices = 1; 4496 /* 4497 * don't enable multiple slices if they are not enabled, 4498 * or if this is not an SMP system 4499 */ 4500 4501 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4502 return; 4503 4504 /* see how many MSI-X interrupts are available */ 4505 msix_cnt = pci_msix_count(sc->dev); 4506 if (msix_cnt < 2) 4507 return; 4508 4509 /* now load the slice aware firmware see what it supports */ 4510 old_fw = sc->fw_name; 4511 if (old_fw == mxge_fw_aligned) 4512 sc->fw_name = mxge_fw_rss_aligned; 4513 else 4514 sc->fw_name = mxge_fw_rss_unaligned; 4515 status = mxge_load_firmware(sc, 0); 4516 if (status != 0) { 4517 device_printf(sc->dev, "Falling back to a single slice\n"); 4518 return; 4519 } 4520 4521 /* try to send a reset command to the card to see if it 4522 is alive */ 4523 memset(&cmd, 0, sizeof (cmd)); 4524 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4525 if (status != 0) { 4526 device_printf(sc->dev, "failed reset\n"); 4527 goto abort_with_fw; 4528 } 4529 4530 /* get rx ring size */ 4531 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4532 if (status != 0) { 4533 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4534 goto abort_with_fw; 4535 } 4536 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4537 4538 /* tell it the size of the interrupt queues */ 4539 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4540 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4541 if (status != 0) { 4542 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4543 goto abort_with_fw; 4544 } 4545 4546 /* ask the maximum number of slices it supports */ 4547 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4548 if (status != 0) { 4549 device_printf(sc->dev, 4550 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4551 goto abort_with_fw; 4552 } 4553 sc->num_slices = cmd.data0; 4554 if (sc->num_slices > msix_cnt) 4555 sc->num_slices = msix_cnt; 4556 4557 if (mxge_max_slices == -1) { 4558 /* cap to number of CPUs in system */ 4559 if (sc->num_slices > mp_ncpus) 4560 sc->num_slices = mp_ncpus; 4561 } else { 4562 if (sc->num_slices > mxge_max_slices) 4563 sc->num_slices = mxge_max_slices; 4564 } 4565 /* make sure it is a power of two */ 4566 while (sc->num_slices & (sc->num_slices - 1)) 4567 sc->num_slices--; 4568 4569 if (mxge_verbose) 4570 device_printf(sc->dev, "using %d slices\n", 4571 sc->num_slices); 4572 4573 return; 4574 4575 abort_with_fw: 4576 sc->fw_name = old_fw; 4577 (void) mxge_load_firmware(sc, 0); 4578 } 4579 4580 static int 4581 mxge_add_msix_irqs(mxge_softc_t *sc) 4582 { 4583 size_t bytes; 4584 int count, err, i, rid; 4585 4586 rid = PCIR_BAR(2); 4587 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4588 &rid, RF_ACTIVE); 4589 4590 if (sc->msix_table_res == NULL) { 4591 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4592 return ENXIO; 4593 } 4594 4595 count = sc->num_slices; 4596 err = pci_alloc_msix(sc->dev, &count); 4597 if (err != 0) { 4598 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4599 "err = %d \n", sc->num_slices, err); 4600 goto abort_with_msix_table; 4601 } 4602 if (count < sc->num_slices) { 4603 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4604 count, sc->num_slices); 4605 device_printf(sc->dev, 4606 "Try setting hw.mxge.max_slices to %d\n", 4607 count); 4608 err = ENOSPC; 4609 goto abort_with_msix; 4610 } 4611 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4612 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4613 if (sc->msix_irq_res == NULL) { 4614 err = ENOMEM; 4615 goto abort_with_msix; 4616 } 4617 4618 for (i = 0; i < sc->num_slices; i++) { 4619 rid = i + 1; 4620 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4621 SYS_RES_IRQ, 4622 &rid, RF_ACTIVE); 4623 if (sc->msix_irq_res[i] == NULL) { 4624 device_printf(sc->dev, "couldn't allocate IRQ res" 4625 " for message %d\n", i); 4626 err = ENXIO; 4627 goto abort_with_res; 4628 } 4629 } 4630 4631 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4632 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4633 4634 for (i = 0; i < sc->num_slices; i++) { 4635 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4636 INTR_TYPE_NET | INTR_MPSAFE, 4637 #if __FreeBSD_version > 700030 4638 NULL, 4639 #endif 4640 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4641 if (err != 0) { 4642 device_printf(sc->dev, "couldn't setup intr for " 4643 "message %d\n", i); 4644 goto abort_with_intr; 4645 } 4646 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4647 sc->msix_ih[i], "s%d", i); 4648 } 4649 4650 if (mxge_verbose) { 4651 device_printf(sc->dev, "using %d msix IRQs:", 4652 sc->num_slices); 4653 for (i = 0; i < sc->num_slices; i++) 4654 printf(" %jd", rman_get_start(sc->msix_irq_res[i])); 4655 printf("\n"); 4656 } 4657 return (0); 4658 4659 abort_with_intr: 4660 for (i = 0; i < sc->num_slices; i++) { 4661 if (sc->msix_ih[i] != NULL) { 4662 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4663 sc->msix_ih[i]); 4664 sc->msix_ih[i] = NULL; 4665 } 4666 } 4667 free(sc->msix_ih, M_DEVBUF); 4668 4669 4670 abort_with_res: 4671 for (i = 0; i < sc->num_slices; i++) { 4672 rid = i + 1; 4673 if (sc->msix_irq_res[i] != NULL) 4674 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4675 sc->msix_irq_res[i]); 4676 sc->msix_irq_res[i] = NULL; 4677 } 4678 free(sc->msix_irq_res, M_DEVBUF); 4679 4680 4681 abort_with_msix: 4682 pci_release_msi(sc->dev); 4683 4684 abort_with_msix_table: 4685 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4686 sc->msix_table_res); 4687 4688 return err; 4689 } 4690 4691 static int 4692 mxge_add_single_irq(mxge_softc_t *sc) 4693 { 4694 int count, err, rid; 4695 4696 count = pci_msi_count(sc->dev); 4697 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4698 rid = 1; 4699 } else { 4700 rid = 0; 4701 sc->legacy_irq = 1; 4702 } 4703 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, 4704 RF_SHAREABLE | RF_ACTIVE); 4705 if (sc->irq_res == NULL) { 4706 device_printf(sc->dev, "could not alloc interrupt\n"); 4707 return ENXIO; 4708 } 4709 if (mxge_verbose) 4710 device_printf(sc->dev, "using %s irq %jd\n", 4711 sc->legacy_irq ? "INTx" : "MSI", 4712 rman_get_start(sc->irq_res)); 4713 err = bus_setup_intr(sc->dev, sc->irq_res, 4714 INTR_TYPE_NET | INTR_MPSAFE, 4715 #if __FreeBSD_version > 700030 4716 NULL, 4717 #endif 4718 mxge_intr, &sc->ss[0], &sc->ih); 4719 if (err != 0) { 4720 bus_release_resource(sc->dev, SYS_RES_IRQ, 4721 sc->legacy_irq ? 0 : 1, sc->irq_res); 4722 if (!sc->legacy_irq) 4723 pci_release_msi(sc->dev); 4724 } 4725 return err; 4726 } 4727 4728 static void 4729 mxge_rem_msix_irqs(mxge_softc_t *sc) 4730 { 4731 int i, rid; 4732 4733 for (i = 0; i < sc->num_slices; i++) { 4734 if (sc->msix_ih[i] != NULL) { 4735 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4736 sc->msix_ih[i]); 4737 sc->msix_ih[i] = NULL; 4738 } 4739 } 4740 free(sc->msix_ih, M_DEVBUF); 4741 4742 for (i = 0; i < sc->num_slices; i++) { 4743 rid = i + 1; 4744 if (sc->msix_irq_res[i] != NULL) 4745 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4746 sc->msix_irq_res[i]); 4747 sc->msix_irq_res[i] = NULL; 4748 } 4749 free(sc->msix_irq_res, M_DEVBUF); 4750 4751 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4752 sc->msix_table_res); 4753 4754 pci_release_msi(sc->dev); 4755 return; 4756 } 4757 4758 static void 4759 mxge_rem_single_irq(mxge_softc_t *sc) 4760 { 4761 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4762 bus_release_resource(sc->dev, SYS_RES_IRQ, 4763 sc->legacy_irq ? 0 : 1, sc->irq_res); 4764 if (!sc->legacy_irq) 4765 pci_release_msi(sc->dev); 4766 } 4767 4768 static void 4769 mxge_rem_irq(mxge_softc_t *sc) 4770 { 4771 if (sc->num_slices > 1) 4772 mxge_rem_msix_irqs(sc); 4773 else 4774 mxge_rem_single_irq(sc); 4775 } 4776 4777 static int 4778 mxge_add_irq(mxge_softc_t *sc) 4779 { 4780 int err; 4781 4782 if (sc->num_slices > 1) 4783 err = mxge_add_msix_irqs(sc); 4784 else 4785 err = mxge_add_single_irq(sc); 4786 4787 if (0 && err == 0 && sc->num_slices > 1) { 4788 mxge_rem_msix_irqs(sc); 4789 err = mxge_add_msix_irqs(sc); 4790 } 4791 return err; 4792 } 4793 4794 4795 static int 4796 mxge_attach(device_t dev) 4797 { 4798 mxge_cmd_t cmd; 4799 mxge_softc_t *sc = device_get_softc(dev); 4800 struct ifnet *ifp; 4801 int err, rid; 4802 4803 sc->dev = dev; 4804 mxge_fetch_tunables(sc); 4805 4806 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4807 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4808 taskqueue_thread_enqueue, &sc->tq); 4809 if (sc->tq == NULL) { 4810 err = ENOMEM; 4811 goto abort_with_nothing; 4812 } 4813 4814 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4815 1, /* alignment */ 4816 0, /* boundary */ 4817 BUS_SPACE_MAXADDR, /* low */ 4818 BUS_SPACE_MAXADDR, /* high */ 4819 NULL, NULL, /* filter */ 4820 65536 + 256, /* maxsize */ 4821 MXGE_MAX_SEND_DESC, /* num segs */ 4822 65536, /* maxsegsize */ 4823 0, /* flags */ 4824 NULL, NULL, /* lock */ 4825 &sc->parent_dmat); /* tag */ 4826 4827 if (err != 0) { 4828 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4829 err); 4830 goto abort_with_tq; 4831 } 4832 4833 ifp = sc->ifp = if_alloc(IFT_ETHER); 4834 if (ifp == NULL) { 4835 device_printf(dev, "can not if_alloc()\n"); 4836 err = ENOSPC; 4837 goto abort_with_parent_dmat; 4838 } 4839 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4840 4841 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4842 device_get_nameunit(dev)); 4843 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4844 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4845 "%s:drv", device_get_nameunit(dev)); 4846 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4847 MTX_NETWORK_LOCK, MTX_DEF); 4848 4849 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4850 4851 mxge_setup_cfg_space(sc); 4852 4853 /* Map the board into the kernel */ 4854 rid = PCIR_BARS; 4855 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, 4856 RF_ACTIVE); 4857 if (sc->mem_res == NULL) { 4858 device_printf(dev, "could not map memory\n"); 4859 err = ENXIO; 4860 goto abort_with_lock; 4861 } 4862 sc->sram = rman_get_virtual(sc->mem_res); 4863 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4864 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4865 device_printf(dev, "impossible memory region size %jd\n", 4866 rman_get_size(sc->mem_res)); 4867 err = ENXIO; 4868 goto abort_with_mem_res; 4869 } 4870 4871 /* make NULL terminated copy of the EEPROM strings section of 4872 lanai SRAM */ 4873 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4874 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4875 rman_get_bushandle(sc->mem_res), 4876 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4877 sc->eeprom_strings, 4878 MXGE_EEPROM_STRINGS_SIZE - 2); 4879 err = mxge_parse_strings(sc); 4880 if (err != 0) 4881 goto abort_with_mem_res; 4882 4883 /* Enable write combining for efficient use of PCIe bus */ 4884 mxge_enable_wc(sc); 4885 4886 /* Allocate the out of band dma memory */ 4887 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4888 sizeof (mxge_cmd_t), 64); 4889 if (err != 0) 4890 goto abort_with_mem_res; 4891 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4892 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4893 if (err != 0) 4894 goto abort_with_cmd_dma; 4895 4896 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4897 if (err != 0) 4898 goto abort_with_zeropad_dma; 4899 4900 /* select & load the firmware */ 4901 err = mxge_select_firmware(sc); 4902 if (err != 0) 4903 goto abort_with_dmabench; 4904 sc->intr_coal_delay = mxge_intr_coal_delay; 4905 4906 mxge_slice_probe(sc); 4907 err = mxge_alloc_slices(sc); 4908 if (err != 0) 4909 goto abort_with_dmabench; 4910 4911 err = mxge_reset(sc, 0); 4912 if (err != 0) 4913 goto abort_with_slices; 4914 4915 err = mxge_alloc_rings(sc); 4916 if (err != 0) { 4917 device_printf(sc->dev, "failed to allocate rings\n"); 4918 goto abort_with_slices; 4919 } 4920 4921 err = mxge_add_irq(sc); 4922 if (err != 0) { 4923 device_printf(sc->dev, "failed to add irq\n"); 4924 goto abort_with_rings; 4925 } 4926 4927 ifp->if_baudrate = IF_Gbps(10); 4928 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4929 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4930 IFCAP_RXCSUM_IPV6; 4931 #if defined(INET) || defined(INET6) 4932 ifp->if_capabilities |= IFCAP_LRO; 4933 #endif 4934 4935 #ifdef MXGE_NEW_VLAN_API 4936 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4937 4938 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4939 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4940 sc->fw_ver_tiny >= 32) 4941 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4942 #endif 4943 sc->max_mtu = mxge_max_mtu(sc); 4944 if (sc->max_mtu >= 9000) 4945 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4946 else 4947 device_printf(dev, "MTU limited to %d. Install " 4948 "latest firmware for 9000 byte jumbo support\n", 4949 sc->max_mtu - ETHER_HDR_LEN); 4950 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4951 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4952 /* check to see if f/w supports TSO for IPv6 */ 4953 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4954 if (CSUM_TCP_IPV6) 4955 ifp->if_capabilities |= IFCAP_TSO6; 4956 sc->max_tso6_hlen = min(cmd.data0, 4957 sizeof (sc->ss[0].scratch)); 4958 } 4959 ifp->if_capenable = ifp->if_capabilities; 4960 if (sc->lro_cnt == 0) 4961 ifp->if_capenable &= ~IFCAP_LRO; 4962 ifp->if_init = mxge_init; 4963 ifp->if_softc = sc; 4964 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4965 ifp->if_ioctl = mxge_ioctl; 4966 ifp->if_start = mxge_start; 4967 ifp->if_get_counter = mxge_get_counter; 4968 ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 4969 ifp->if_hw_tsomaxsegcount = sc->ss[0].tx.max_desc; 4970 ifp->if_hw_tsomaxsegsize = IP_MAXPACKET; 4971 /* Initialise the ifmedia structure */ 4972 ifmedia_init(&sc->media, 0, mxge_media_change, 4973 mxge_media_status); 4974 mxge_media_init(sc); 4975 mxge_media_probe(sc); 4976 sc->dying = 0; 4977 ether_ifattach(ifp, sc->mac_addr); 4978 /* ether_ifattach sets mtu to ETHERMTU */ 4979 if (mxge_initial_mtu != ETHERMTU) 4980 mxge_change_mtu(sc, mxge_initial_mtu); 4981 4982 mxge_add_sysctls(sc); 4983 #ifdef IFNET_BUF_RING 4984 ifp->if_transmit = mxge_transmit; 4985 ifp->if_qflush = mxge_qflush; 4986 #endif 4987 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4988 device_get_nameunit(sc->dev)); 4989 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4990 return 0; 4991 4992 abort_with_rings: 4993 mxge_free_rings(sc); 4994 abort_with_slices: 4995 mxge_free_slices(sc); 4996 abort_with_dmabench: 4997 mxge_dma_free(&sc->dmabench_dma); 4998 abort_with_zeropad_dma: 4999 mxge_dma_free(&sc->zeropad_dma); 5000 abort_with_cmd_dma: 5001 mxge_dma_free(&sc->cmd_dma); 5002 abort_with_mem_res: 5003 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5004 abort_with_lock: 5005 pci_disable_busmaster(dev); 5006 mtx_destroy(&sc->cmd_mtx); 5007 mtx_destroy(&sc->driver_mtx); 5008 if_free(ifp); 5009 abort_with_parent_dmat: 5010 bus_dma_tag_destroy(sc->parent_dmat); 5011 abort_with_tq: 5012 if (sc->tq != NULL) { 5013 taskqueue_drain(sc->tq, &sc->watchdog_task); 5014 taskqueue_free(sc->tq); 5015 sc->tq = NULL; 5016 } 5017 abort_with_nothing: 5018 return err; 5019 } 5020 5021 static int 5022 mxge_detach(device_t dev) 5023 { 5024 mxge_softc_t *sc = device_get_softc(dev); 5025 5026 if (mxge_vlans_active(sc)) { 5027 device_printf(sc->dev, 5028 "Detach vlans before removing module\n"); 5029 return EBUSY; 5030 } 5031 mtx_lock(&sc->driver_mtx); 5032 sc->dying = 1; 5033 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 5034 mxge_close(sc, 0); 5035 mtx_unlock(&sc->driver_mtx); 5036 ether_ifdetach(sc->ifp); 5037 if (sc->tq != NULL) { 5038 taskqueue_drain(sc->tq, &sc->watchdog_task); 5039 taskqueue_free(sc->tq); 5040 sc->tq = NULL; 5041 } 5042 callout_drain(&sc->co_hdl); 5043 ifmedia_removeall(&sc->media); 5044 mxge_dummy_rdma(sc, 0); 5045 mxge_rem_sysctls(sc); 5046 mxge_rem_irq(sc); 5047 mxge_free_rings(sc); 5048 mxge_free_slices(sc); 5049 mxge_dma_free(&sc->dmabench_dma); 5050 mxge_dma_free(&sc->zeropad_dma); 5051 mxge_dma_free(&sc->cmd_dma); 5052 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5053 pci_disable_busmaster(dev); 5054 mtx_destroy(&sc->cmd_mtx); 5055 mtx_destroy(&sc->driver_mtx); 5056 if_free(sc->ifp); 5057 bus_dma_tag_destroy(sc->parent_dmat); 5058 return 0; 5059 } 5060 5061 static int 5062 mxge_shutdown(device_t dev) 5063 { 5064 return 0; 5065 } 5066 5067 /* 5068 This file uses Myri10GE driver indentation. 5069 5070 Local Variables: 5071 c-file-style:"linux" 5072 tab-width:8 5073 End: 5074 */ 5075