1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ethernet.h> 52 #include <net/if_dl.h> 53 #include <net/if_media.h> 54 55 #include <net/bpf.h> 56 57 #include <net/if_types.h> 58 #include <net/if_vlan_var.h> 59 #include <net/zlib.h> 60 61 #include <netinet/in_systm.h> 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/tcp.h> 65 66 #include <machine/bus.h> 67 #include <machine/in_cksum.h> 68 #include <machine/resource.h> 69 #include <sys/bus.h> 70 #include <sys/rman.h> 71 #include <sys/smp.h> 72 73 #include <dev/pci/pcireg.h> 74 #include <dev/pci/pcivar.h> 75 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386) || defined(__amd64) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/mxge/mxge_mcp.h> 85 #include <dev/mxge/mcp_gen_header.h> 86 /*#define MXGE_FAKE_IFP*/ 87 #include <dev/mxge/if_mxge_var.h> 88 #ifdef IFNET_BUF_RING 89 #include <sys/buf_ring.h> 90 #endif 91 92 /* tunable params */ 93 static int mxge_nvidia_ecrc_enable = 1; 94 static int mxge_force_firmware = 0; 95 static int mxge_intr_coal_delay = 30; 96 static int mxge_deassert_wait = 1; 97 static int mxge_flow_control = 1; 98 static int mxge_verbose = 0; 99 static int mxge_lro_cnt = 8; 100 static int mxge_ticks; 101 static int mxge_max_slices = 1; 102 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 103 static int mxge_always_promisc = 0; 104 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 105 static char *mxge_fw_aligned = "mxge_eth_z8e"; 106 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 107 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 108 109 static int mxge_probe(device_t dev); 110 static int mxge_attach(device_t dev); 111 static int mxge_detach(device_t dev); 112 static int mxge_shutdown(device_t dev); 113 static void mxge_intr(void *arg); 114 115 static device_method_t mxge_methods[] = 116 { 117 /* Device interface */ 118 DEVMETHOD(device_probe, mxge_probe), 119 DEVMETHOD(device_attach, mxge_attach), 120 DEVMETHOD(device_detach, mxge_detach), 121 DEVMETHOD(device_shutdown, mxge_shutdown), 122 {0, 0} 123 }; 124 125 static driver_t mxge_driver = 126 { 127 "mxge", 128 mxge_methods, 129 sizeof(mxge_softc_t), 130 }; 131 132 static devclass_t mxge_devclass; 133 134 /* Declare ourselves to be a child of the PCI bus.*/ 135 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 136 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 137 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 138 139 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 140 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 141 static int mxge_close(mxge_softc_t *sc); 142 static int mxge_open(mxge_softc_t *sc); 143 static void mxge_tick(void *arg); 144 145 static int 146 mxge_probe(device_t dev) 147 { 148 int rev; 149 150 151 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 152 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 153 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 154 rev = pci_get_revid(dev); 155 switch (rev) { 156 case MXGE_PCI_REV_Z8E: 157 device_set_desc(dev, "Myri10G-PCIE-8A"); 158 break; 159 case MXGE_PCI_REV_Z8ES: 160 device_set_desc(dev, "Myri10G-PCIE-8B"); 161 break; 162 default: 163 device_set_desc(dev, "Myri10G-PCIE-8??"); 164 device_printf(dev, "Unrecognized rev %d NIC\n", 165 rev); 166 break; 167 } 168 return 0; 169 } 170 return ENXIO; 171 } 172 173 static void 174 mxge_enable_wc(mxge_softc_t *sc) 175 { 176 #if defined(__i386) || defined(__amd64) 177 vm_offset_t len; 178 int err; 179 180 sc->wc = 1; 181 len = rman_get_size(sc->mem_res); 182 err = pmap_change_attr((vm_offset_t) sc->sram, 183 len, PAT_WRITE_COMBINING); 184 if (err != 0) { 185 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 186 err); 187 sc->wc = 0; 188 } 189 #endif 190 } 191 192 193 /* callback to get our DMA address */ 194 static void 195 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 196 int error) 197 { 198 if (error == 0) { 199 *(bus_addr_t *) arg = segs->ds_addr; 200 } 201 } 202 203 static int 204 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 205 bus_size_t alignment) 206 { 207 int err; 208 device_t dev = sc->dev; 209 bus_size_t boundary, maxsegsize; 210 211 if (bytes > 4096 && alignment == 4096) { 212 boundary = 0; 213 maxsegsize = bytes; 214 } else { 215 boundary = 4096; 216 maxsegsize = 4096; 217 } 218 219 /* allocate DMAable memory tags */ 220 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 221 alignment, /* alignment */ 222 boundary, /* boundary */ 223 BUS_SPACE_MAXADDR, /* low */ 224 BUS_SPACE_MAXADDR, /* high */ 225 NULL, NULL, /* filter */ 226 bytes, /* maxsize */ 227 1, /* num segs */ 228 maxsegsize, /* maxsegsize */ 229 BUS_DMA_COHERENT, /* flags */ 230 NULL, NULL, /* lock */ 231 &dma->dmat); /* tag */ 232 if (err != 0) { 233 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 234 return err; 235 } 236 237 /* allocate DMAable memory & map */ 238 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 239 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 240 | BUS_DMA_ZERO), &dma->map); 241 if (err != 0) { 242 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 243 goto abort_with_dmat; 244 } 245 246 /* load the memory */ 247 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 248 mxge_dmamap_callback, 249 (void *)&dma->bus_addr, 0); 250 if (err != 0) { 251 device_printf(dev, "couldn't load map (err = %d)\n", err); 252 goto abort_with_mem; 253 } 254 return 0; 255 256 abort_with_mem: 257 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 258 abort_with_dmat: 259 (void)bus_dma_tag_destroy(dma->dmat); 260 return err; 261 } 262 263 264 static void 265 mxge_dma_free(mxge_dma_t *dma) 266 { 267 bus_dmamap_unload(dma->dmat, dma->map); 268 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 269 (void)bus_dma_tag_destroy(dma->dmat); 270 } 271 272 /* 273 * The eeprom strings on the lanaiX have the format 274 * SN=x\0 275 * MAC=x:x:x:x:x:x\0 276 * PC=text\0 277 */ 278 279 static int 280 mxge_parse_strings(mxge_softc_t *sc) 281 { 282 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 283 284 char *ptr, *limit; 285 int i, found_mac; 286 287 ptr = sc->eeprom_strings; 288 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 289 found_mac = 0; 290 while (ptr < limit && *ptr != '\0') { 291 if (memcmp(ptr, "MAC=", 4) == 0) { 292 ptr += 1; 293 sc->mac_addr_string = ptr; 294 for (i = 0; i < 6; i++) { 295 ptr += 3; 296 if ((ptr + 2) > limit) 297 goto abort; 298 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 299 found_mac = 1; 300 } 301 } else if (memcmp(ptr, "PC=", 3) == 0) { 302 ptr += 3; 303 strncpy(sc->product_code_string, ptr, 304 sizeof (sc->product_code_string) - 1); 305 } else if (memcmp(ptr, "SN=", 3) == 0) { 306 ptr += 3; 307 strncpy(sc->serial_number_string, ptr, 308 sizeof (sc->serial_number_string) - 1); 309 } 310 MXGE_NEXT_STRING(ptr); 311 } 312 313 if (found_mac) 314 return 0; 315 316 abort: 317 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 318 319 return ENXIO; 320 } 321 322 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 323 static void 324 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 325 { 326 uint32_t val; 327 unsigned long base, off; 328 char *va, *cfgptr; 329 device_t pdev, mcp55; 330 uint16_t vendor_id, device_id, word; 331 uintptr_t bus, slot, func, ivend, idev; 332 uint32_t *ptr32; 333 334 335 if (!mxge_nvidia_ecrc_enable) 336 return; 337 338 pdev = device_get_parent(device_get_parent(sc->dev)); 339 if (pdev == NULL) { 340 device_printf(sc->dev, "could not find parent?\n"); 341 return; 342 } 343 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 344 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 345 346 if (vendor_id != 0x10de) 347 return; 348 349 base = 0; 350 351 if (device_id == 0x005d) { 352 /* ck804, base address is magic */ 353 base = 0xe0000000UL; 354 } else if (device_id >= 0x0374 && device_id <= 0x378) { 355 /* mcp55, base address stored in chipset */ 356 mcp55 = pci_find_bsf(0, 0, 0); 357 if (mcp55 && 358 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 359 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 360 word = pci_read_config(mcp55, 0x90, 2); 361 base = ((unsigned long)word & 0x7ffeU) << 25; 362 } 363 } 364 if (!base) 365 return; 366 367 /* XXXX 368 Test below is commented because it is believed that doing 369 config read/write beyond 0xff will access the config space 370 for the next larger function. Uncomment this and remove 371 the hacky pmap_mapdev() way of accessing config space when 372 FreeBSD grows support for extended pcie config space access 373 */ 374 #if 0 375 /* See if we can, by some miracle, access the extended 376 config space */ 377 val = pci_read_config(pdev, 0x178, 4); 378 if (val != 0xffffffff) { 379 val |= 0x40; 380 pci_write_config(pdev, 0x178, val, 4); 381 return; 382 } 383 #endif 384 /* Rather than using normal pci config space writes, we must 385 * map the Nvidia config space ourselves. This is because on 386 * opteron/nvidia class machine the 0xe000000 mapping is 387 * handled by the nvidia chipset, that means the internal PCI 388 * device (the on-chip northbridge), or the amd-8131 bridge 389 * and things behind them are not visible by this method. 390 */ 391 392 BUS_READ_IVAR(device_get_parent(pdev), pdev, 393 PCI_IVAR_BUS, &bus); 394 BUS_READ_IVAR(device_get_parent(pdev), pdev, 395 PCI_IVAR_SLOT, &slot); 396 BUS_READ_IVAR(device_get_parent(pdev), pdev, 397 PCI_IVAR_FUNCTION, &func); 398 BUS_READ_IVAR(device_get_parent(pdev), pdev, 399 PCI_IVAR_VENDOR, &ivend); 400 BUS_READ_IVAR(device_get_parent(pdev), pdev, 401 PCI_IVAR_DEVICE, &idev); 402 403 off = base 404 + 0x00100000UL * (unsigned long)bus 405 + 0x00001000UL * (unsigned long)(func 406 + 8 * slot); 407 408 /* map it into the kernel */ 409 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 410 411 412 if (va == NULL) { 413 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 414 return; 415 } 416 /* get a pointer to the config space mapped into the kernel */ 417 cfgptr = va + (off & PAGE_MASK); 418 419 /* make sure that we can really access it */ 420 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 421 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 422 if (! (vendor_id == ivend && device_id == idev)) { 423 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 424 vendor_id, device_id); 425 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 426 return; 427 } 428 429 ptr32 = (uint32_t*)(cfgptr + 0x178); 430 val = *ptr32; 431 432 if (val == 0xffffffff) { 433 device_printf(sc->dev, "extended mapping failed\n"); 434 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 435 return; 436 } 437 *ptr32 = val | 0x40; 438 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 439 if (mxge_verbose) 440 device_printf(sc->dev, 441 "Enabled ECRC on upstream Nvidia bridge " 442 "at %d:%d:%d\n", 443 (int)bus, (int)slot, (int)func); 444 return; 445 } 446 #else 447 static void 448 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 449 { 450 device_printf(sc->dev, 451 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 452 return; 453 } 454 #endif 455 456 457 static int 458 mxge_dma_test(mxge_softc_t *sc, int test_type) 459 { 460 mxge_cmd_t cmd; 461 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 462 int status; 463 uint32_t len; 464 char *test = " "; 465 466 467 /* Run a small DMA test. 468 * The magic multipliers to the length tell the firmware 469 * to do DMA read, write, or read+write tests. The 470 * results are returned in cmd.data0. The upper 16 471 * bits of the return is the number of transfers completed. 472 * The lower 16 bits is the time in 0.5us ticks that the 473 * transfers took to complete. 474 */ 475 476 len = sc->tx_boundary; 477 478 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 479 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 480 cmd.data2 = len * 0x10000; 481 status = mxge_send_cmd(sc, test_type, &cmd); 482 if (status != 0) { 483 test = "read"; 484 goto abort; 485 } 486 sc->read_dma = ((cmd.data0>>16) * len * 2) / 487 (cmd.data0 & 0xffff); 488 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 489 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 490 cmd.data2 = len * 0x1; 491 status = mxge_send_cmd(sc, test_type, &cmd); 492 if (status != 0) { 493 test = "write"; 494 goto abort; 495 } 496 sc->write_dma = ((cmd.data0>>16) * len * 2) / 497 (cmd.data0 & 0xffff); 498 499 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 500 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 501 cmd.data2 = len * 0x10001; 502 status = mxge_send_cmd(sc, test_type, &cmd); 503 if (status != 0) { 504 test = "read/write"; 505 goto abort; 506 } 507 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 508 (cmd.data0 & 0xffff); 509 510 abort: 511 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 512 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 513 test, status); 514 515 return status; 516 } 517 518 /* 519 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 520 * when the PCI-E Completion packets are aligned on an 8-byte 521 * boundary. Some PCI-E chip sets always align Completion packets; on 522 * the ones that do not, the alignment can be enforced by enabling 523 * ECRC generation (if supported). 524 * 525 * When PCI-E Completion packets are not aligned, it is actually more 526 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 527 * 528 * If the driver can neither enable ECRC nor verify that it has 529 * already been enabled, then it must use a firmware image which works 530 * around unaligned completion packets (ethp_z8e.dat), and it should 531 * also ensure that it never gives the device a Read-DMA which is 532 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 533 * enabled, then the driver should use the aligned (eth_z8e.dat) 534 * firmware image, and set tx_boundary to 4KB. 535 */ 536 537 static int 538 mxge_firmware_probe(mxge_softc_t *sc) 539 { 540 device_t dev = sc->dev; 541 int reg, status; 542 uint16_t pectl; 543 544 sc->tx_boundary = 4096; 545 /* 546 * Verify the max read request size was set to 4KB 547 * before trying the test with 4KB. 548 */ 549 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 550 pectl = pci_read_config(dev, reg + 0x8, 2); 551 if ((pectl & (5 << 12)) != (5 << 12)) { 552 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 553 pectl); 554 sc->tx_boundary = 2048; 555 } 556 } 557 558 /* 559 * load the optimized firmware (which assumes aligned PCIe 560 * completions) in order to see if it works on this host. 561 */ 562 sc->fw_name = mxge_fw_aligned; 563 status = mxge_load_firmware(sc, 1); 564 if (status != 0) { 565 return status; 566 } 567 568 /* 569 * Enable ECRC if possible 570 */ 571 mxge_enable_nvidia_ecrc(sc); 572 573 /* 574 * Run a DMA test which watches for unaligned completions and 575 * aborts on the first one seen. 576 */ 577 578 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 579 if (status == 0) 580 return 0; /* keep the aligned firmware */ 581 582 if (status != E2BIG) 583 device_printf(dev, "DMA test failed: %d\n", status); 584 if (status == ENOSYS) 585 device_printf(dev, "Falling back to ethp! " 586 "Please install up to date fw\n"); 587 return status; 588 } 589 590 static int 591 mxge_select_firmware(mxge_softc_t *sc) 592 { 593 int aligned = 0; 594 595 596 if (mxge_force_firmware != 0) { 597 if (mxge_force_firmware == 1) 598 aligned = 1; 599 else 600 aligned = 0; 601 if (mxge_verbose) 602 device_printf(sc->dev, 603 "Assuming %s completions (forced)\n", 604 aligned ? "aligned" : "unaligned"); 605 goto abort; 606 } 607 608 /* if the PCIe link width is 4 or less, we can use the aligned 609 firmware and skip any checks */ 610 if (sc->link_width != 0 && sc->link_width <= 4) { 611 device_printf(sc->dev, 612 "PCIe x%d Link, expect reduced performance\n", 613 sc->link_width); 614 aligned = 1; 615 goto abort; 616 } 617 618 if (0 == mxge_firmware_probe(sc)) 619 return 0; 620 621 abort: 622 if (aligned) { 623 sc->fw_name = mxge_fw_aligned; 624 sc->tx_boundary = 4096; 625 } else { 626 sc->fw_name = mxge_fw_unaligned; 627 sc->tx_boundary = 2048; 628 } 629 return (mxge_load_firmware(sc, 0)); 630 } 631 632 union qualhack 633 { 634 const char *ro_char; 635 char *rw_char; 636 }; 637 638 static int 639 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 640 { 641 642 643 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 644 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 645 be32toh(hdr->mcp_type)); 646 return EIO; 647 } 648 649 /* save firmware version for sysctl */ 650 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 651 if (mxge_verbose) 652 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 653 654 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 655 &sc->fw_ver_minor, &sc->fw_ver_tiny); 656 657 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 658 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 659 device_printf(sc->dev, "Found firmware version %s\n", 660 sc->fw_version); 661 device_printf(sc->dev, "Driver needs %d.%d\n", 662 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 663 return EINVAL; 664 } 665 return 0; 666 667 } 668 669 static void * 670 z_alloc(void *nil, u_int items, u_int size) 671 { 672 void *ptr; 673 674 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 675 return ptr; 676 } 677 678 static void 679 z_free(void *nil, void *ptr) 680 { 681 free(ptr, M_TEMP); 682 } 683 684 685 static int 686 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 687 { 688 z_stream zs; 689 char *inflate_buffer; 690 const struct firmware *fw; 691 const mcp_gen_header_t *hdr; 692 unsigned hdr_offset; 693 int status; 694 unsigned int i; 695 char dummy; 696 size_t fw_len; 697 698 fw = firmware_get(sc->fw_name); 699 if (fw == NULL) { 700 device_printf(sc->dev, "Could not find firmware image %s\n", 701 sc->fw_name); 702 return ENOENT; 703 } 704 705 706 707 /* setup zlib and decompress f/w */ 708 bzero(&zs, sizeof (zs)); 709 zs.zalloc = z_alloc; 710 zs.zfree = z_free; 711 status = inflateInit(&zs); 712 if (status != Z_OK) { 713 status = EIO; 714 goto abort_with_fw; 715 } 716 717 /* the uncompressed size is stored as the firmware version, 718 which would otherwise go unused */ 719 fw_len = (size_t) fw->version; 720 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 721 if (inflate_buffer == NULL) 722 goto abort_with_zs; 723 zs.avail_in = fw->datasize; 724 zs.next_in = __DECONST(char *, fw->data); 725 zs.avail_out = fw_len; 726 zs.next_out = inflate_buffer; 727 status = inflate(&zs, Z_FINISH); 728 if (status != Z_STREAM_END) { 729 device_printf(sc->dev, "zlib %d\n", status); 730 status = EIO; 731 goto abort_with_buffer; 732 } 733 734 /* check id */ 735 hdr_offset = htobe32(*(const uint32_t *) 736 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 737 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 738 device_printf(sc->dev, "Bad firmware file"); 739 status = EIO; 740 goto abort_with_buffer; 741 } 742 hdr = (const void*)(inflate_buffer + hdr_offset); 743 744 status = mxge_validate_firmware(sc, hdr); 745 if (status != 0) 746 goto abort_with_buffer; 747 748 /* Copy the inflated firmware to NIC SRAM. */ 749 for (i = 0; i < fw_len; i += 256) { 750 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 751 inflate_buffer + i, 752 min(256U, (unsigned)(fw_len - i))); 753 wmb(); 754 dummy = *sc->sram; 755 wmb(); 756 } 757 758 *limit = fw_len; 759 status = 0; 760 abort_with_buffer: 761 free(inflate_buffer, M_TEMP); 762 abort_with_zs: 763 inflateEnd(&zs); 764 abort_with_fw: 765 firmware_put(fw, FIRMWARE_UNLOAD); 766 return status; 767 } 768 769 /* 770 * Enable or disable periodic RDMAs from the host to make certain 771 * chipsets resend dropped PCIe messages 772 */ 773 774 static void 775 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 776 { 777 char buf_bytes[72]; 778 volatile uint32_t *confirm; 779 volatile char *submit; 780 uint32_t *buf, dma_low, dma_high; 781 int i; 782 783 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 784 785 /* clear confirmation addr */ 786 confirm = (volatile uint32_t *)sc->cmd; 787 *confirm = 0; 788 wmb(); 789 790 /* send an rdma command to the PCIe engine, and wait for the 791 response in the confirmation address. The firmware should 792 write a -1 there to indicate it is alive and well 793 */ 794 795 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 796 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 797 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 798 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 799 buf[2] = htobe32(0xffffffff); /* confirm data */ 800 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 801 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 802 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 803 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 804 buf[5] = htobe32(enable); /* enable? */ 805 806 807 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 808 809 mxge_pio_copy(submit, buf, 64); 810 wmb(); 811 DELAY(1000); 812 wmb(); 813 i = 0; 814 while (*confirm != 0xffffffff && i < 20) { 815 DELAY(1000); 816 i++; 817 } 818 if (*confirm != 0xffffffff) { 819 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 820 (enable ? "enable" : "disable"), confirm, 821 *confirm); 822 } 823 return; 824 } 825 826 static int 827 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 828 { 829 mcp_cmd_t *buf; 830 char buf_bytes[sizeof(*buf) + 8]; 831 volatile mcp_cmd_response_t *response = sc->cmd; 832 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 833 uint32_t dma_low, dma_high; 834 int err, sleep_total = 0; 835 836 /* ensure buf is aligned to 8 bytes */ 837 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 838 839 buf->data0 = htobe32(data->data0); 840 buf->data1 = htobe32(data->data1); 841 buf->data2 = htobe32(data->data2); 842 buf->cmd = htobe32(cmd); 843 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 844 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 845 846 buf->response_addr.low = htobe32(dma_low); 847 buf->response_addr.high = htobe32(dma_high); 848 mtx_lock(&sc->cmd_mtx); 849 response->result = 0xffffffff; 850 wmb(); 851 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 852 853 /* wait up to 20ms */ 854 err = EAGAIN; 855 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 856 bus_dmamap_sync(sc->cmd_dma.dmat, 857 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 858 wmb(); 859 switch (be32toh(response->result)) { 860 case 0: 861 data->data0 = be32toh(response->data); 862 err = 0; 863 break; 864 case 0xffffffff: 865 DELAY(1000); 866 break; 867 case MXGEFW_CMD_UNKNOWN: 868 err = ENOSYS; 869 break; 870 case MXGEFW_CMD_ERROR_UNALIGNED: 871 err = E2BIG; 872 break; 873 case MXGEFW_CMD_ERROR_BUSY: 874 err = EBUSY; 875 break; 876 default: 877 device_printf(sc->dev, 878 "mxge: command %d " 879 "failed, result = %d\n", 880 cmd, be32toh(response->result)); 881 err = ENXIO; 882 break; 883 } 884 if (err != EAGAIN) 885 break; 886 } 887 if (err == EAGAIN) 888 device_printf(sc->dev, "mxge: command %d timed out" 889 "result = %d\n", 890 cmd, be32toh(response->result)); 891 mtx_unlock(&sc->cmd_mtx); 892 return err; 893 } 894 895 static int 896 mxge_adopt_running_firmware(mxge_softc_t *sc) 897 { 898 struct mcp_gen_header *hdr; 899 const size_t bytes = sizeof (struct mcp_gen_header); 900 size_t hdr_offset; 901 int status; 902 903 /* find running firmware header */ 904 hdr_offset = htobe32(*(volatile uint32_t *) 905 (sc->sram + MCP_HEADER_PTR_OFFSET)); 906 907 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 908 device_printf(sc->dev, 909 "Running firmware has bad header offset (%d)\n", 910 (int)hdr_offset); 911 return EIO; 912 } 913 914 /* copy header of running firmware from SRAM to host memory to 915 * validate firmware */ 916 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 917 if (hdr == NULL) { 918 device_printf(sc->dev, "could not malloc firmware hdr\n"); 919 return ENOMEM; 920 } 921 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 922 rman_get_bushandle(sc->mem_res), 923 hdr_offset, (char *)hdr, bytes); 924 status = mxge_validate_firmware(sc, hdr); 925 free(hdr, M_DEVBUF); 926 927 /* 928 * check to see if adopted firmware has bug where adopting 929 * it will cause broadcasts to be filtered unless the NIC 930 * is kept in ALLMULTI mode 931 */ 932 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 933 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 934 sc->adopted_rx_filter_bug = 1; 935 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 936 "working around rx filter bug\n", 937 sc->fw_ver_major, sc->fw_ver_minor, 938 sc->fw_ver_tiny); 939 } 940 941 return status; 942 } 943 944 945 static int 946 mxge_load_firmware(mxge_softc_t *sc, int adopt) 947 { 948 volatile uint32_t *confirm; 949 volatile char *submit; 950 char buf_bytes[72]; 951 uint32_t *buf, size, dma_low, dma_high; 952 int status, i; 953 954 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 955 956 size = sc->sram_size; 957 status = mxge_load_firmware_helper(sc, &size); 958 if (status) { 959 if (!adopt) 960 return status; 961 /* Try to use the currently running firmware, if 962 it is new enough */ 963 status = mxge_adopt_running_firmware(sc); 964 if (status) { 965 device_printf(sc->dev, 966 "failed to adopt running firmware\n"); 967 return status; 968 } 969 device_printf(sc->dev, 970 "Successfully adopted running firmware\n"); 971 if (sc->tx_boundary == 4096) { 972 device_printf(sc->dev, 973 "Using firmware currently running on NIC" 974 ". For optimal\n"); 975 device_printf(sc->dev, 976 "performance consider loading optimized " 977 "firmware\n"); 978 } 979 sc->fw_name = mxge_fw_unaligned; 980 sc->tx_boundary = 2048; 981 return 0; 982 } 983 /* clear confirmation addr */ 984 confirm = (volatile uint32_t *)sc->cmd; 985 *confirm = 0; 986 wmb(); 987 /* send a reload command to the bootstrap MCP, and wait for the 988 response in the confirmation address. The firmware should 989 write a -1 there to indicate it is alive and well 990 */ 991 992 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 993 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 994 995 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 996 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 997 buf[2] = htobe32(0xffffffff); /* confirm data */ 998 999 /* FIX: All newest firmware should un-protect the bottom of 1000 the sram before handoff. However, the very first interfaces 1001 do not. Therefore the handoff copy must skip the first 8 bytes 1002 */ 1003 /* where the code starts*/ 1004 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1005 buf[4] = htobe32(size - 8); /* length of code */ 1006 buf[5] = htobe32(8); /* where to copy to */ 1007 buf[6] = htobe32(0); /* where to jump to */ 1008 1009 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1010 mxge_pio_copy(submit, buf, 64); 1011 wmb(); 1012 DELAY(1000); 1013 wmb(); 1014 i = 0; 1015 while (*confirm != 0xffffffff && i < 20) { 1016 DELAY(1000*10); 1017 i++; 1018 bus_dmamap_sync(sc->cmd_dma.dmat, 1019 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1020 } 1021 if (*confirm != 0xffffffff) { 1022 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1023 confirm, *confirm); 1024 1025 return ENXIO; 1026 } 1027 return 0; 1028 } 1029 1030 static int 1031 mxge_update_mac_address(mxge_softc_t *sc) 1032 { 1033 mxge_cmd_t cmd; 1034 uint8_t *addr = sc->mac_addr; 1035 int status; 1036 1037 1038 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1039 | (addr[2] << 8) | addr[3]); 1040 1041 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1042 1043 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1044 return status; 1045 } 1046 1047 static int 1048 mxge_change_pause(mxge_softc_t *sc, int pause) 1049 { 1050 mxge_cmd_t cmd; 1051 int status; 1052 1053 if (pause) 1054 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1055 &cmd); 1056 else 1057 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1058 &cmd); 1059 1060 if (status) { 1061 device_printf(sc->dev, "Failed to set flow control mode\n"); 1062 return ENXIO; 1063 } 1064 sc->pause = pause; 1065 return 0; 1066 } 1067 1068 static void 1069 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1070 { 1071 mxge_cmd_t cmd; 1072 int status; 1073 1074 if (mxge_always_promisc) 1075 promisc = 1; 1076 1077 if (promisc) 1078 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1079 &cmd); 1080 else 1081 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1082 &cmd); 1083 1084 if (status) { 1085 device_printf(sc->dev, "Failed to set promisc mode\n"); 1086 } 1087 } 1088 1089 static void 1090 mxge_set_multicast_list(mxge_softc_t *sc) 1091 { 1092 mxge_cmd_t cmd; 1093 struct ifmultiaddr *ifma; 1094 struct ifnet *ifp = sc->ifp; 1095 int err; 1096 1097 /* This firmware is known to not support multicast */ 1098 if (!sc->fw_multicast_support) 1099 return; 1100 1101 /* Disable multicast filtering while we play with the lists*/ 1102 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1103 if (err != 0) { 1104 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1105 " error status: %d\n", err); 1106 return; 1107 } 1108 1109 if (sc->adopted_rx_filter_bug) 1110 return; 1111 1112 if (ifp->if_flags & IFF_ALLMULTI) 1113 /* request to disable multicast filtering, so quit here */ 1114 return; 1115 1116 /* Flush all the filters */ 1117 1118 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1119 if (err != 0) { 1120 device_printf(sc->dev, 1121 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1122 ", error status: %d\n", err); 1123 return; 1124 } 1125 1126 /* Walk the multicast list, and add each address */ 1127 1128 IF_ADDR_LOCK(ifp); 1129 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1130 if (ifma->ifma_addr->sa_family != AF_LINK) 1131 continue; 1132 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1133 &cmd.data0, 4); 1134 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1135 &cmd.data1, 2); 1136 cmd.data0 = htonl(cmd.data0); 1137 cmd.data1 = htonl(cmd.data1); 1138 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1139 if (err != 0) { 1140 device_printf(sc->dev, "Failed " 1141 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1142 "%d\t", err); 1143 /* abort, leaving multicast filtering off */ 1144 IF_ADDR_UNLOCK(ifp); 1145 return; 1146 } 1147 } 1148 IF_ADDR_UNLOCK(ifp); 1149 /* Enable multicast filtering */ 1150 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1151 if (err != 0) { 1152 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1153 ", error status: %d\n", err); 1154 } 1155 } 1156 1157 static int 1158 mxge_max_mtu(mxge_softc_t *sc) 1159 { 1160 mxge_cmd_t cmd; 1161 int status; 1162 1163 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1164 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1165 1166 /* try to set nbufs to see if it we can 1167 use virtually contiguous jumbos */ 1168 cmd.data0 = 0; 1169 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1170 &cmd); 1171 if (status == 0) 1172 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1173 1174 /* otherwise, we're limited to MJUMPAGESIZE */ 1175 return MJUMPAGESIZE - MXGEFW_PAD; 1176 } 1177 1178 static int 1179 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1180 { 1181 struct mxge_slice_state *ss; 1182 mxge_rx_done_t *rx_done; 1183 volatile uint32_t *irq_claim; 1184 mxge_cmd_t cmd; 1185 int slice, status; 1186 1187 /* try to send a reset command to the card to see if it 1188 is alive */ 1189 memset(&cmd, 0, sizeof (cmd)); 1190 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1191 if (status != 0) { 1192 device_printf(sc->dev, "failed reset\n"); 1193 return ENXIO; 1194 } 1195 1196 mxge_dummy_rdma(sc, 1); 1197 1198 1199 /* set the intrq size */ 1200 cmd.data0 = sc->rx_ring_size; 1201 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1202 1203 /* 1204 * Even though we already know how many slices are supported 1205 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1206 * has magic side effects, and must be called after a reset. 1207 * It must be called prior to calling any RSS related cmds, 1208 * including assigning an interrupt queue for anything but 1209 * slice 0. It must also be called *after* 1210 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1211 * the firmware to compute offsets. 1212 */ 1213 1214 if (sc->num_slices > 1) { 1215 /* ask the maximum number of slices it supports */ 1216 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1217 &cmd); 1218 if (status != 0) { 1219 device_printf(sc->dev, 1220 "failed to get number of slices\n"); 1221 return status; 1222 } 1223 /* 1224 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1225 * to setting up the interrupt queue DMA 1226 */ 1227 cmd.data0 = sc->num_slices; 1228 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1229 #ifdef IFNET_BUF_RING 1230 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1231 #endif 1232 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1233 &cmd); 1234 if (status != 0) { 1235 device_printf(sc->dev, 1236 "failed to set number of slices\n"); 1237 return status; 1238 } 1239 } 1240 1241 1242 if (interrupts_setup) { 1243 /* Now exchange information about interrupts */ 1244 for (slice = 0; slice < sc->num_slices; slice++) { 1245 rx_done = &sc->ss[slice].rx_done; 1246 memset(rx_done->entry, 0, sc->rx_ring_size); 1247 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1248 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1249 cmd.data2 = slice; 1250 status |= mxge_send_cmd(sc, 1251 MXGEFW_CMD_SET_INTRQ_DMA, 1252 &cmd); 1253 } 1254 } 1255 1256 status |= mxge_send_cmd(sc, 1257 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1258 1259 1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1261 1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1264 1265 1266 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1267 &cmd); 1268 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1269 if (status != 0) { 1270 device_printf(sc->dev, "failed set interrupt parameters\n"); 1271 return status; 1272 } 1273 1274 1275 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1276 1277 1278 /* run a DMA benchmark */ 1279 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1280 1281 for (slice = 0; slice < sc->num_slices; slice++) { 1282 ss = &sc->ss[slice]; 1283 1284 ss->irq_claim = irq_claim + (2 * slice); 1285 /* reset mcp/driver shared state back to 0 */ 1286 ss->rx_done.idx = 0; 1287 ss->rx_done.cnt = 0; 1288 ss->tx.req = 0; 1289 ss->tx.done = 0; 1290 ss->tx.pkt_done = 0; 1291 ss->tx.queue_active = 0; 1292 ss->tx.activate = 0; 1293 ss->tx.deactivate = 0; 1294 ss->tx.wake = 0; 1295 ss->tx.defrag = 0; 1296 ss->tx.stall = 0; 1297 ss->rx_big.cnt = 0; 1298 ss->rx_small.cnt = 0; 1299 ss->lro_bad_csum = 0; 1300 ss->lro_queued = 0; 1301 ss->lro_flushed = 0; 1302 if (ss->fw_stats != NULL) { 1303 ss->fw_stats->valid = 0; 1304 ss->fw_stats->send_done_count = 0; 1305 } 1306 } 1307 sc->rdma_tags_available = 15; 1308 status = mxge_update_mac_address(sc); 1309 mxge_change_promisc(sc, 0); 1310 mxge_change_pause(sc, sc->pause); 1311 mxge_set_multicast_list(sc); 1312 return status; 1313 } 1314 1315 static int 1316 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1317 { 1318 mxge_softc_t *sc; 1319 unsigned int intr_coal_delay; 1320 int err; 1321 1322 sc = arg1; 1323 intr_coal_delay = sc->intr_coal_delay; 1324 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1325 if (err != 0) { 1326 return err; 1327 } 1328 if (intr_coal_delay == sc->intr_coal_delay) 1329 return 0; 1330 1331 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1332 return EINVAL; 1333 1334 mtx_lock(&sc->driver_mtx); 1335 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1336 sc->intr_coal_delay = intr_coal_delay; 1337 1338 mtx_unlock(&sc->driver_mtx); 1339 return err; 1340 } 1341 1342 static int 1343 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1344 { 1345 mxge_softc_t *sc; 1346 unsigned int enabled; 1347 int err; 1348 1349 sc = arg1; 1350 enabled = sc->pause; 1351 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1352 if (err != 0) { 1353 return err; 1354 } 1355 if (enabled == sc->pause) 1356 return 0; 1357 1358 mtx_lock(&sc->driver_mtx); 1359 err = mxge_change_pause(sc, enabled); 1360 mtx_unlock(&sc->driver_mtx); 1361 return err; 1362 } 1363 1364 static int 1365 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1366 { 1367 struct ifnet *ifp; 1368 int err = 0; 1369 1370 ifp = sc->ifp; 1371 if (lro_cnt == 0) 1372 ifp->if_capenable &= ~IFCAP_LRO; 1373 else 1374 ifp->if_capenable |= IFCAP_LRO; 1375 sc->lro_cnt = lro_cnt; 1376 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1377 mxge_close(sc); 1378 err = mxge_open(sc); 1379 } 1380 return err; 1381 } 1382 1383 static int 1384 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1385 { 1386 mxge_softc_t *sc; 1387 unsigned int lro_cnt; 1388 int err; 1389 1390 sc = arg1; 1391 lro_cnt = sc->lro_cnt; 1392 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1393 if (err != 0) 1394 return err; 1395 1396 if (lro_cnt == sc->lro_cnt) 1397 return 0; 1398 1399 if (lro_cnt > 128) 1400 return EINVAL; 1401 1402 mtx_lock(&sc->driver_mtx); 1403 err = mxge_change_lro_locked(sc, lro_cnt); 1404 mtx_unlock(&sc->driver_mtx); 1405 return err; 1406 } 1407 1408 static int 1409 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1410 { 1411 int err; 1412 1413 if (arg1 == NULL) 1414 return EFAULT; 1415 arg2 = be32toh(*(int *)arg1); 1416 arg1 = NULL; 1417 err = sysctl_handle_int(oidp, arg1, arg2, req); 1418 1419 return err; 1420 } 1421 1422 static void 1423 mxge_rem_sysctls(mxge_softc_t *sc) 1424 { 1425 struct mxge_slice_state *ss; 1426 int slice; 1427 1428 if (sc->slice_sysctl_tree == NULL) 1429 return; 1430 1431 for (slice = 0; slice < sc->num_slices; slice++) { 1432 ss = &sc->ss[slice]; 1433 if (ss == NULL || ss->sysctl_tree == NULL) 1434 continue; 1435 sysctl_ctx_free(&ss->sysctl_ctx); 1436 ss->sysctl_tree = NULL; 1437 } 1438 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1439 sc->slice_sysctl_tree = NULL; 1440 } 1441 1442 static void 1443 mxge_add_sysctls(mxge_softc_t *sc) 1444 { 1445 struct sysctl_ctx_list *ctx; 1446 struct sysctl_oid_list *children; 1447 mcp_irq_data_t *fw; 1448 struct mxge_slice_state *ss; 1449 int slice; 1450 char slice_num[8]; 1451 1452 ctx = device_get_sysctl_ctx(sc->dev); 1453 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1454 fw = sc->ss[0].fw_stats; 1455 1456 /* random information */ 1457 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1458 "firmware_version", 1459 CTLFLAG_RD, &sc->fw_version, 1460 0, "firmware version"); 1461 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1462 "serial_number", 1463 CTLFLAG_RD, &sc->serial_number_string, 1464 0, "serial number"); 1465 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1466 "product_code", 1467 CTLFLAG_RD, &sc->product_code_string, 1468 0, "product_code"); 1469 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1470 "pcie_link_width", 1471 CTLFLAG_RD, &sc->link_width, 1472 0, "tx_boundary"); 1473 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1474 "tx_boundary", 1475 CTLFLAG_RD, &sc->tx_boundary, 1476 0, "tx_boundary"); 1477 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1478 "write_combine", 1479 CTLFLAG_RD, &sc->wc, 1480 0, "write combining PIO?"); 1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1482 "read_dma_MBs", 1483 CTLFLAG_RD, &sc->read_dma, 1484 0, "DMA Read speed in MB/s"); 1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1486 "write_dma_MBs", 1487 CTLFLAG_RD, &sc->write_dma, 1488 0, "DMA Write speed in MB/s"); 1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1490 "read_write_dma_MBs", 1491 CTLFLAG_RD, &sc->read_write_dma, 1492 0, "DMA concurrent Read/Write speed in MB/s"); 1493 1494 1495 /* performance related tunables */ 1496 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1497 "intr_coal_delay", 1498 CTLTYPE_INT|CTLFLAG_RW, sc, 1499 0, mxge_change_intr_coal, 1500 "I", "interrupt coalescing delay in usecs"); 1501 1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1503 "flow_control_enabled", 1504 CTLTYPE_INT|CTLFLAG_RW, sc, 1505 0, mxge_change_flow_control, 1506 "I", "interrupt coalescing delay in usecs"); 1507 1508 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1509 "deassert_wait", 1510 CTLFLAG_RW, &mxge_deassert_wait, 1511 0, "Wait for IRQ line to go low in ihandler"); 1512 1513 /* stats block from firmware is in network byte order. 1514 Need to swap it */ 1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1516 "link_up", 1517 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1518 0, mxge_handle_be32, 1519 "I", "link up"); 1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1521 "rdma_tags_available", 1522 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1523 0, mxge_handle_be32, 1524 "I", "rdma_tags_available"); 1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1526 "dropped_bad_crc32", 1527 CTLTYPE_INT|CTLFLAG_RD, 1528 &fw->dropped_bad_crc32, 1529 0, mxge_handle_be32, 1530 "I", "dropped_bad_crc32"); 1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1532 "dropped_bad_phy", 1533 CTLTYPE_INT|CTLFLAG_RD, 1534 &fw->dropped_bad_phy, 1535 0, mxge_handle_be32, 1536 "I", "dropped_bad_phy"); 1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1538 "dropped_link_error_or_filtered", 1539 CTLTYPE_INT|CTLFLAG_RD, 1540 &fw->dropped_link_error_or_filtered, 1541 0, mxge_handle_be32, 1542 "I", "dropped_link_error_or_filtered"); 1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1544 "dropped_link_overflow", 1545 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1546 0, mxge_handle_be32, 1547 "I", "dropped_link_overflow"); 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "dropped_multicast_filtered", 1550 CTLTYPE_INT|CTLFLAG_RD, 1551 &fw->dropped_multicast_filtered, 1552 0, mxge_handle_be32, 1553 "I", "dropped_multicast_filtered"); 1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1555 "dropped_no_big_buffer", 1556 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1557 0, mxge_handle_be32, 1558 "I", "dropped_no_big_buffer"); 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "dropped_no_small_buffer", 1561 CTLTYPE_INT|CTLFLAG_RD, 1562 &fw->dropped_no_small_buffer, 1563 0, mxge_handle_be32, 1564 "I", "dropped_no_small_buffer"); 1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1566 "dropped_overrun", 1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1568 0, mxge_handle_be32, 1569 "I", "dropped_overrun"); 1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1571 "dropped_pause", 1572 CTLTYPE_INT|CTLFLAG_RD, 1573 &fw->dropped_pause, 1574 0, mxge_handle_be32, 1575 "I", "dropped_pause"); 1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1577 "dropped_runt", 1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1579 0, mxge_handle_be32, 1580 "I", "dropped_runt"); 1581 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_unicast_filtered", 1584 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1585 0, mxge_handle_be32, 1586 "I", "dropped_unicast_filtered"); 1587 1588 /* verbose printing? */ 1589 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1590 "verbose", 1591 CTLFLAG_RW, &mxge_verbose, 1592 0, "verbose printing"); 1593 1594 /* lro */ 1595 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1596 "lro_cnt", 1597 CTLTYPE_INT|CTLFLAG_RW, sc, 1598 0, mxge_change_lro, 1599 "I", "number of lro merge queues"); 1600 1601 1602 /* add counters exported for debugging from all slices */ 1603 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1604 sc->slice_sysctl_tree = 1605 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1606 "slice", CTLFLAG_RD, 0, ""); 1607 1608 for (slice = 0; slice < sc->num_slices; slice++) { 1609 ss = &sc->ss[slice]; 1610 sysctl_ctx_init(&ss->sysctl_ctx); 1611 ctx = &ss->sysctl_ctx; 1612 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1613 sprintf(slice_num, "%d", slice); 1614 ss->sysctl_tree = 1615 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1616 CTLFLAG_RD, 0, ""); 1617 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1619 "rx_small_cnt", 1620 CTLFLAG_RD, &ss->rx_small.cnt, 1621 0, "rx_small_cnt"); 1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1623 "rx_big_cnt", 1624 CTLFLAG_RD, &ss->rx_big.cnt, 1625 0, "rx_small_cnt"); 1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1627 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1628 0, "number of lro merge queues flushed"); 1629 1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1631 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1632 0, "number of frames appended to lro merge" 1633 "queues"); 1634 1635 #ifndef IFNET_BUF_RING 1636 /* only transmit from slice 0 for now */ 1637 if (slice > 0) 1638 continue; 1639 #endif 1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1641 "tx_req", 1642 CTLFLAG_RD, &ss->tx.req, 1643 0, "tx_req"); 1644 1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1646 "tx_done", 1647 CTLFLAG_RD, &ss->tx.done, 1648 0, "tx_done"); 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "tx_pkt_done", 1651 CTLFLAG_RD, &ss->tx.pkt_done, 1652 0, "tx_done"); 1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1654 "tx_stall", 1655 CTLFLAG_RD, &ss->tx.stall, 1656 0, "tx_stall"); 1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1658 "tx_wake", 1659 CTLFLAG_RD, &ss->tx.wake, 1660 0, "tx_wake"); 1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1662 "tx_defrag", 1663 CTLFLAG_RD, &ss->tx.defrag, 1664 0, "tx_defrag"); 1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1666 "tx_queue_active", 1667 CTLFLAG_RD, &ss->tx.queue_active, 1668 0, "tx_queue_active"); 1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1670 "tx_activate", 1671 CTLFLAG_RD, &ss->tx.activate, 1672 0, "tx_activate"); 1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1674 "tx_deactivate", 1675 CTLFLAG_RD, &ss->tx.deactivate, 1676 0, "tx_deactivate"); 1677 } 1678 } 1679 1680 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1681 backwards one at a time and handle ring wraps */ 1682 1683 static inline void 1684 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1685 mcp_kreq_ether_send_t *src, int cnt) 1686 { 1687 int idx, starting_slot; 1688 starting_slot = tx->req; 1689 while (cnt > 1) { 1690 cnt--; 1691 idx = (starting_slot + cnt) & tx->mask; 1692 mxge_pio_copy(&tx->lanai[idx], 1693 &src[cnt], sizeof(*src)); 1694 wmb(); 1695 } 1696 } 1697 1698 /* 1699 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1700 * at most 32 bytes at a time, so as to avoid involving the software 1701 * pio handler in the nic. We re-write the first segment's flags 1702 * to mark them valid only after writing the entire chain 1703 */ 1704 1705 static inline void 1706 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1707 int cnt) 1708 { 1709 int idx, i; 1710 uint32_t *src_ints; 1711 volatile uint32_t *dst_ints; 1712 mcp_kreq_ether_send_t *srcp; 1713 volatile mcp_kreq_ether_send_t *dstp, *dst; 1714 uint8_t last_flags; 1715 1716 idx = tx->req & tx->mask; 1717 1718 last_flags = src->flags; 1719 src->flags = 0; 1720 wmb(); 1721 dst = dstp = &tx->lanai[idx]; 1722 srcp = src; 1723 1724 if ((idx + cnt) < tx->mask) { 1725 for (i = 0; i < (cnt - 1); i += 2) { 1726 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1727 wmb(); /* force write every 32 bytes */ 1728 srcp += 2; 1729 dstp += 2; 1730 } 1731 } else { 1732 /* submit all but the first request, and ensure 1733 that it is submitted below */ 1734 mxge_submit_req_backwards(tx, src, cnt); 1735 i = 0; 1736 } 1737 if (i < cnt) { 1738 /* submit the first request */ 1739 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1740 wmb(); /* barrier before setting valid flag */ 1741 } 1742 1743 /* re-write the last 32-bits with the valid flags */ 1744 src->flags = last_flags; 1745 src_ints = (uint32_t *)src; 1746 src_ints+=3; 1747 dst_ints = (volatile uint32_t *)dst; 1748 dst_ints+=3; 1749 *dst_ints = *src_ints; 1750 tx->req += cnt; 1751 wmb(); 1752 } 1753 1754 #if IFCAP_TSO4 1755 1756 static void 1757 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1758 int busdma_seg_cnt, int ip_off) 1759 { 1760 mxge_tx_ring_t *tx; 1761 mcp_kreq_ether_send_t *req; 1762 bus_dma_segment_t *seg; 1763 struct ip *ip; 1764 struct tcphdr *tcp; 1765 uint32_t low, high_swapped; 1766 int len, seglen, cum_len, cum_len_next; 1767 int next_is_first, chop, cnt, rdma_count, small; 1768 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1769 uint8_t flags, flags_next; 1770 static int once; 1771 1772 mss = m->m_pkthdr.tso_segsz; 1773 1774 /* negative cum_len signifies to the 1775 * send loop that we are still in the 1776 * header portion of the TSO packet. 1777 */ 1778 1779 /* ensure we have the ethernet, IP and TCP 1780 header together in the first mbuf, copy 1781 it to a scratch buffer if not */ 1782 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1783 m_copydata(m, 0, ip_off + sizeof (*ip), 1784 ss->scratch); 1785 ip = (struct ip *)(ss->scratch + ip_off); 1786 } else { 1787 ip = (struct ip *)(mtod(m, char *) + ip_off); 1788 } 1789 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1790 + sizeof (*tcp))) { 1791 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1792 + sizeof (*tcp), ss->scratch); 1793 ip = (struct ip *)(mtod(m, char *) + ip_off); 1794 } 1795 1796 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1797 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1798 1799 /* TSO implies checksum offload on this hardware */ 1800 cksum_offset = ip_off + (ip->ip_hl << 2); 1801 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1802 1803 1804 /* for TSO, pseudo_hdr_offset holds mss. 1805 * The firmware figures out where to put 1806 * the checksum by parsing the header. */ 1807 pseudo_hdr_offset = htobe16(mss); 1808 1809 tx = &ss->tx; 1810 req = tx->req_list; 1811 seg = tx->seg_list; 1812 cnt = 0; 1813 rdma_count = 0; 1814 /* "rdma_count" is the number of RDMAs belonging to the 1815 * current packet BEFORE the current send request. For 1816 * non-TSO packets, this is equal to "count". 1817 * For TSO packets, rdma_count needs to be reset 1818 * to 0 after a segment cut. 1819 * 1820 * The rdma_count field of the send request is 1821 * the number of RDMAs of the packet starting at 1822 * that request. For TSO send requests with one ore more cuts 1823 * in the middle, this is the number of RDMAs starting 1824 * after the last cut in the request. All previous 1825 * segments before the last cut implicitly have 1 RDMA. 1826 * 1827 * Since the number of RDMAs is not known beforehand, 1828 * it must be filled-in retroactively - after each 1829 * segmentation cut or at the end of the entire packet. 1830 */ 1831 1832 while (busdma_seg_cnt) { 1833 /* Break the busdma segment up into pieces*/ 1834 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1835 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1836 len = seg->ds_len; 1837 1838 while (len) { 1839 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1840 seglen = len; 1841 cum_len_next = cum_len + seglen; 1842 (req-rdma_count)->rdma_count = rdma_count + 1; 1843 if (__predict_true(cum_len >= 0)) { 1844 /* payload */ 1845 chop = (cum_len_next > mss); 1846 cum_len_next = cum_len_next % mss; 1847 next_is_first = (cum_len_next == 0); 1848 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1849 flags_next |= next_is_first * 1850 MXGEFW_FLAGS_FIRST; 1851 rdma_count |= -(chop | next_is_first); 1852 rdma_count += chop & !next_is_first; 1853 } else if (cum_len_next >= 0) { 1854 /* header ends */ 1855 rdma_count = -1; 1856 cum_len_next = 0; 1857 seglen = -cum_len; 1858 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1859 flags_next = MXGEFW_FLAGS_TSO_PLD | 1860 MXGEFW_FLAGS_FIRST | 1861 (small * MXGEFW_FLAGS_SMALL); 1862 } 1863 1864 req->addr_high = high_swapped; 1865 req->addr_low = htobe32(low); 1866 req->pseudo_hdr_offset = pseudo_hdr_offset; 1867 req->pad = 0; 1868 req->rdma_count = 1; 1869 req->length = htobe16(seglen); 1870 req->cksum_offset = cksum_offset; 1871 req->flags = flags | ((cum_len & 1) * 1872 MXGEFW_FLAGS_ALIGN_ODD); 1873 low += seglen; 1874 len -= seglen; 1875 cum_len = cum_len_next; 1876 flags = flags_next; 1877 req++; 1878 cnt++; 1879 rdma_count++; 1880 if (__predict_false(cksum_offset > seglen)) 1881 cksum_offset -= seglen; 1882 else 1883 cksum_offset = 0; 1884 if (__predict_false(cnt > tx->max_desc)) 1885 goto drop; 1886 } 1887 busdma_seg_cnt--; 1888 seg++; 1889 } 1890 (req-rdma_count)->rdma_count = rdma_count; 1891 1892 do { 1893 req--; 1894 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1895 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1896 1897 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1898 mxge_submit_req(tx, tx->req_list, cnt); 1899 #ifdef IFNET_BUF_RING 1900 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1901 /* tell the NIC to start polling this slice */ 1902 *tx->send_go = 1; 1903 tx->queue_active = 1; 1904 tx->activate++; 1905 wmb(); 1906 } 1907 #endif 1908 return; 1909 1910 drop: 1911 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1912 m_freem(m); 1913 ss->oerrors++; 1914 if (!once) { 1915 printf("tx->max_desc exceeded via TSO!\n"); 1916 printf("mss = %d, %ld, %d!\n", mss, 1917 (long)seg - (long)tx->seg_list, tx->max_desc); 1918 once = 1; 1919 } 1920 return; 1921 1922 } 1923 1924 #endif /* IFCAP_TSO4 */ 1925 1926 #ifdef MXGE_NEW_VLAN_API 1927 /* 1928 * We reproduce the software vlan tag insertion from 1929 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1930 * vlan tag insertion. We need to advertise this in order to have the 1931 * vlan interface respect our csum offload flags. 1932 */ 1933 static struct mbuf * 1934 mxge_vlan_tag_insert(struct mbuf *m) 1935 { 1936 struct ether_vlan_header *evl; 1937 1938 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1939 if (__predict_false(m == NULL)) 1940 return NULL; 1941 if (m->m_len < sizeof(*evl)) { 1942 m = m_pullup(m, sizeof(*evl)); 1943 if (__predict_false(m == NULL)) 1944 return NULL; 1945 } 1946 /* 1947 * Transform the Ethernet header into an Ethernet header 1948 * with 802.1Q encapsulation. 1949 */ 1950 evl = mtod(m, struct ether_vlan_header *); 1951 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1952 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1953 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1954 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1955 m->m_flags &= ~M_VLANTAG; 1956 return m; 1957 } 1958 #endif /* MXGE_NEW_VLAN_API */ 1959 1960 static void 1961 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 1962 { 1963 mxge_softc_t *sc; 1964 mcp_kreq_ether_send_t *req; 1965 bus_dma_segment_t *seg; 1966 struct mbuf *m_tmp; 1967 struct ifnet *ifp; 1968 mxge_tx_ring_t *tx; 1969 struct ip *ip; 1970 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1971 uint16_t pseudo_hdr_offset; 1972 uint8_t flags, cksum_offset; 1973 1974 1975 sc = ss->sc; 1976 ifp = sc->ifp; 1977 tx = &ss->tx; 1978 1979 ip_off = sizeof (struct ether_header); 1980 #ifdef MXGE_NEW_VLAN_API 1981 if (m->m_flags & M_VLANTAG) { 1982 m = mxge_vlan_tag_insert(m); 1983 if (__predict_false(m == NULL)) 1984 goto drop; 1985 ip_off += ETHER_VLAN_ENCAP_LEN; 1986 } 1987 #endif 1988 /* (try to) map the frame for DMA */ 1989 idx = tx->req & tx->mask; 1990 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1991 m, tx->seg_list, &cnt, 1992 BUS_DMA_NOWAIT); 1993 if (__predict_false(err == EFBIG)) { 1994 /* Too many segments in the chain. Try 1995 to defrag */ 1996 m_tmp = m_defrag(m, M_NOWAIT); 1997 if (m_tmp == NULL) { 1998 goto drop; 1999 } 2000 ss->tx.defrag++; 2001 m = m_tmp; 2002 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2003 tx->info[idx].map, 2004 m, tx->seg_list, &cnt, 2005 BUS_DMA_NOWAIT); 2006 } 2007 if (__predict_false(err != 0)) { 2008 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2009 " packet len = %d\n", err, m->m_pkthdr.len); 2010 goto drop; 2011 } 2012 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2013 BUS_DMASYNC_PREWRITE); 2014 tx->info[idx].m = m; 2015 2016 #if IFCAP_TSO4 2017 /* TSO is different enough, we handle it in another routine */ 2018 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2019 mxge_encap_tso(ss, m, cnt, ip_off); 2020 return; 2021 } 2022 #endif 2023 2024 req = tx->req_list; 2025 cksum_offset = 0; 2026 pseudo_hdr_offset = 0; 2027 flags = MXGEFW_FLAGS_NO_TSO; 2028 2029 /* checksum offloading? */ 2030 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2031 /* ensure ip header is in first mbuf, copy 2032 it to a scratch buffer if not */ 2033 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2034 m_copydata(m, 0, ip_off + sizeof (*ip), 2035 ss->scratch); 2036 ip = (struct ip *)(ss->scratch + ip_off); 2037 } else { 2038 ip = (struct ip *)(mtod(m, char *) + ip_off); 2039 } 2040 cksum_offset = ip_off + (ip->ip_hl << 2); 2041 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2042 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2043 req->cksum_offset = cksum_offset; 2044 flags |= MXGEFW_FLAGS_CKSUM; 2045 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2046 } else { 2047 odd_flag = 0; 2048 } 2049 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2050 flags |= MXGEFW_FLAGS_SMALL; 2051 2052 /* convert segments into a request list */ 2053 cum_len = 0; 2054 seg = tx->seg_list; 2055 req->flags = MXGEFW_FLAGS_FIRST; 2056 for (i = 0; i < cnt; i++) { 2057 req->addr_low = 2058 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2059 req->addr_high = 2060 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2061 req->length = htobe16(seg->ds_len); 2062 req->cksum_offset = cksum_offset; 2063 if (cksum_offset > seg->ds_len) 2064 cksum_offset -= seg->ds_len; 2065 else 2066 cksum_offset = 0; 2067 req->pseudo_hdr_offset = pseudo_hdr_offset; 2068 req->pad = 0; /* complete solid 16-byte block */ 2069 req->rdma_count = 1; 2070 req->flags |= flags | ((cum_len & 1) * odd_flag); 2071 cum_len += seg->ds_len; 2072 seg++; 2073 req++; 2074 req->flags = 0; 2075 } 2076 req--; 2077 /* pad runts to 60 bytes */ 2078 if (cum_len < 60) { 2079 req++; 2080 req->addr_low = 2081 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2082 req->addr_high = 2083 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2084 req->length = htobe16(60 - cum_len); 2085 req->cksum_offset = 0; 2086 req->pseudo_hdr_offset = pseudo_hdr_offset; 2087 req->pad = 0; /* complete solid 16-byte block */ 2088 req->rdma_count = 1; 2089 req->flags |= flags | ((cum_len & 1) * odd_flag); 2090 cnt++; 2091 } 2092 2093 tx->req_list[0].rdma_count = cnt; 2094 #if 0 2095 /* print what the firmware will see */ 2096 for (i = 0; i < cnt; i++) { 2097 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2098 "cso:%d, flags:0x%x, rdma:%d\n", 2099 i, (int)ntohl(tx->req_list[i].addr_high), 2100 (int)ntohl(tx->req_list[i].addr_low), 2101 (int)ntohs(tx->req_list[i].length), 2102 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2103 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2104 tx->req_list[i].rdma_count); 2105 } 2106 printf("--------------\n"); 2107 #endif 2108 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2109 mxge_submit_req(tx, tx->req_list, cnt); 2110 #ifdef IFNET_BUF_RING 2111 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2112 /* tell the NIC to start polling this slice */ 2113 *tx->send_go = 1; 2114 tx->queue_active = 1; 2115 tx->activate++; 2116 wmb(); 2117 } 2118 #endif 2119 return; 2120 2121 drop: 2122 m_freem(m); 2123 ss->oerrors++; 2124 return; 2125 } 2126 2127 #ifdef IFNET_BUF_RING 2128 static void 2129 mxge_qflush(struct ifnet *ifp) 2130 { 2131 mxge_softc_t *sc = ifp->if_softc; 2132 mxge_tx_ring_t *tx; 2133 struct mbuf *m; 2134 int slice; 2135 2136 for (slice = 0; slice < sc->num_slices; slice++) { 2137 tx = &sc->ss[slice].tx; 2138 mtx_lock(&tx->mtx); 2139 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2140 m_freem(m); 2141 mtx_unlock(&tx->mtx); 2142 } 2143 if_qflush(ifp); 2144 } 2145 2146 static inline void 2147 mxge_start_locked(struct mxge_slice_state *ss) 2148 { 2149 mxge_softc_t *sc; 2150 struct mbuf *m; 2151 struct ifnet *ifp; 2152 mxge_tx_ring_t *tx; 2153 2154 sc = ss->sc; 2155 ifp = sc->ifp; 2156 tx = &ss->tx; 2157 2158 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2159 m = drbr_dequeue(ifp, tx->br); 2160 if (m == NULL) { 2161 return; 2162 } 2163 /* let BPF see it */ 2164 BPF_MTAP(ifp, m); 2165 2166 /* give it to the nic */ 2167 mxge_encap(ss, m); 2168 } 2169 /* ran out of transmit slots */ 2170 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2171 && (!drbr_empty(ifp, tx->br))) { 2172 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2173 tx->stall++; 2174 } 2175 } 2176 2177 static int 2178 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2179 { 2180 mxge_softc_t *sc; 2181 struct ifnet *ifp; 2182 mxge_tx_ring_t *tx; 2183 int err; 2184 2185 sc = ss->sc; 2186 ifp = sc->ifp; 2187 tx = &ss->tx; 2188 2189 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2190 IFF_DRV_RUNNING) { 2191 err = drbr_enqueue(ifp, tx->br, m); 2192 return (err); 2193 } 2194 2195 if (drbr_empty(ifp, tx->br) && 2196 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2197 /* let BPF see it */ 2198 BPF_MTAP(ifp, m); 2199 /* give it to the nic */ 2200 mxge_encap(ss, m); 2201 drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags); 2202 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2203 return (err); 2204 } 2205 if (!drbr_empty(ifp, tx->br)) 2206 mxge_start_locked(ss); 2207 return (0); 2208 } 2209 2210 static int 2211 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2212 { 2213 mxge_softc_t *sc = ifp->if_softc; 2214 struct mxge_slice_state *ss; 2215 mxge_tx_ring_t *tx; 2216 int err = 0; 2217 int slice; 2218 2219 slice = m->m_pkthdr.flowid; 2220 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2221 2222 ss = &sc->ss[slice]; 2223 tx = &ss->tx; 2224 2225 if (mtx_trylock(&tx->mtx)) { 2226 err = mxge_transmit_locked(ss, m); 2227 mtx_unlock(&tx->mtx); 2228 } else { 2229 err = drbr_enqueue(ifp, tx->br, m); 2230 } 2231 2232 return (err); 2233 } 2234 2235 #else 2236 2237 static inline void 2238 mxge_start_locked(struct mxge_slice_state *ss) 2239 { 2240 mxge_softc_t *sc; 2241 struct mbuf *m; 2242 struct ifnet *ifp; 2243 mxge_tx_ring_t *tx; 2244 2245 sc = ss->sc; 2246 ifp = sc->ifp; 2247 tx = &ss->tx; 2248 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2249 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2250 if (m == NULL) { 2251 return; 2252 } 2253 /* let BPF see it */ 2254 BPF_MTAP(ifp, m); 2255 2256 /* give it to the nic */ 2257 mxge_encap(ss, m); 2258 } 2259 /* ran out of transmit slots */ 2260 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2261 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2262 tx->stall++; 2263 } 2264 } 2265 #endif 2266 static void 2267 mxge_start(struct ifnet *ifp) 2268 { 2269 mxge_softc_t *sc = ifp->if_softc; 2270 struct mxge_slice_state *ss; 2271 2272 /* only use the first slice for now */ 2273 ss = &sc->ss[0]; 2274 mtx_lock(&ss->tx.mtx); 2275 mxge_start_locked(ss); 2276 mtx_unlock(&ss->tx.mtx); 2277 } 2278 2279 /* 2280 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2281 * at most 32 bytes at a time, so as to avoid involving the software 2282 * pio handler in the nic. We re-write the first segment's low 2283 * DMA address to mark it valid only after we write the entire chunk 2284 * in a burst 2285 */ 2286 static inline void 2287 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2288 mcp_kreq_ether_recv_t *src) 2289 { 2290 uint32_t low; 2291 2292 low = src->addr_low; 2293 src->addr_low = 0xffffffff; 2294 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2295 wmb(); 2296 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2297 wmb(); 2298 src->addr_low = low; 2299 dst->addr_low = low; 2300 wmb(); 2301 } 2302 2303 static int 2304 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2305 { 2306 bus_dma_segment_t seg; 2307 struct mbuf *m; 2308 mxge_rx_ring_t *rx = &ss->rx_small; 2309 int cnt, err; 2310 2311 m = m_gethdr(M_DONTWAIT, MT_DATA); 2312 if (m == NULL) { 2313 rx->alloc_fail++; 2314 err = ENOBUFS; 2315 goto done; 2316 } 2317 m->m_len = MHLEN; 2318 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2319 &seg, &cnt, BUS_DMA_NOWAIT); 2320 if (err != 0) { 2321 m_free(m); 2322 goto done; 2323 } 2324 rx->info[idx].m = m; 2325 rx->shadow[idx].addr_low = 2326 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2327 rx->shadow[idx].addr_high = 2328 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2329 2330 done: 2331 if ((idx & 7) == 7) 2332 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2333 return err; 2334 } 2335 2336 static int 2337 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2338 { 2339 bus_dma_segment_t seg[3]; 2340 struct mbuf *m; 2341 mxge_rx_ring_t *rx = &ss->rx_big; 2342 int cnt, err, i; 2343 2344 if (rx->cl_size == MCLBYTES) 2345 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2346 else 2347 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2348 if (m == NULL) { 2349 rx->alloc_fail++; 2350 err = ENOBUFS; 2351 goto done; 2352 } 2353 m->m_len = rx->mlen; 2354 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2355 seg, &cnt, BUS_DMA_NOWAIT); 2356 if (err != 0) { 2357 m_free(m); 2358 goto done; 2359 } 2360 rx->info[idx].m = m; 2361 rx->shadow[idx].addr_low = 2362 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2363 rx->shadow[idx].addr_high = 2364 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2365 2366 #if MXGE_VIRT_JUMBOS 2367 for (i = 1; i < cnt; i++) { 2368 rx->shadow[idx + i].addr_low = 2369 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2370 rx->shadow[idx + i].addr_high = 2371 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2372 } 2373 #endif 2374 2375 done: 2376 for (i = 0; i < rx->nbufs; i++) { 2377 if ((idx & 7) == 7) { 2378 mxge_submit_8rx(&rx->lanai[idx - 7], 2379 &rx->shadow[idx - 7]); 2380 } 2381 idx++; 2382 } 2383 return err; 2384 } 2385 2386 /* 2387 * Myri10GE hardware checksums are not valid if the sender 2388 * padded the frame with non-zero padding. This is because 2389 * the firmware just does a simple 16-bit 1s complement 2390 * checksum across the entire frame, excluding the first 14 2391 * bytes. It is best to simply to check the checksum and 2392 * tell the stack about it only if the checksum is good 2393 */ 2394 2395 static inline uint16_t 2396 mxge_rx_csum(struct mbuf *m, int csum) 2397 { 2398 struct ether_header *eh; 2399 struct ip *ip; 2400 uint16_t c; 2401 2402 eh = mtod(m, struct ether_header *); 2403 2404 /* only deal with IPv4 TCP & UDP for now */ 2405 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2406 return 1; 2407 ip = (struct ip *)(eh + 1); 2408 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2409 ip->ip_p != IPPROTO_UDP)) 2410 return 1; 2411 2412 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2413 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2414 - (ip->ip_hl << 2) + ip->ip_p)); 2415 c ^= 0xffff; 2416 return (c); 2417 } 2418 2419 static void 2420 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2421 { 2422 struct ether_vlan_header *evl; 2423 struct ether_header *eh; 2424 uint32_t partial; 2425 2426 evl = mtod(m, struct ether_vlan_header *); 2427 eh = mtod(m, struct ether_header *); 2428 2429 /* 2430 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2431 * after what the firmware thought was the end of the ethernet 2432 * header. 2433 */ 2434 2435 /* put checksum into host byte order */ 2436 *csum = ntohs(*csum); 2437 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2438 (*csum) += ~partial; 2439 (*csum) += ((*csum) < ~partial); 2440 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2441 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2442 2443 /* restore checksum to network byte order; 2444 later consumers expect this */ 2445 *csum = htons(*csum); 2446 2447 /* save the tag */ 2448 #ifdef MXGE_NEW_VLAN_API 2449 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2450 #else 2451 { 2452 struct m_tag *mtag; 2453 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2454 M_NOWAIT); 2455 if (mtag == NULL) 2456 return; 2457 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2458 m_tag_prepend(m, mtag); 2459 } 2460 2461 #endif 2462 m->m_flags |= M_VLANTAG; 2463 2464 /* 2465 * Remove the 802.1q header by copying the Ethernet 2466 * addresses over it and adjusting the beginning of 2467 * the data in the mbuf. The encapsulated Ethernet 2468 * type field is already in place. 2469 */ 2470 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2471 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2472 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2473 } 2474 2475 2476 static inline void 2477 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2478 { 2479 mxge_softc_t *sc; 2480 struct ifnet *ifp; 2481 struct mbuf *m; 2482 struct ether_header *eh; 2483 mxge_rx_ring_t *rx; 2484 bus_dmamap_t old_map; 2485 int idx; 2486 uint16_t tcpudp_csum; 2487 2488 sc = ss->sc; 2489 ifp = sc->ifp; 2490 rx = &ss->rx_big; 2491 idx = rx->cnt & rx->mask; 2492 rx->cnt += rx->nbufs; 2493 /* save a pointer to the received mbuf */ 2494 m = rx->info[idx].m; 2495 /* try to replace the received mbuf */ 2496 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2497 /* drop the frame -- the old mbuf is re-cycled */ 2498 ifp->if_ierrors++; 2499 return; 2500 } 2501 2502 /* unmap the received buffer */ 2503 old_map = rx->info[idx].map; 2504 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2505 bus_dmamap_unload(rx->dmat, old_map); 2506 2507 /* swap the bus_dmamap_t's */ 2508 rx->info[idx].map = rx->extra_map; 2509 rx->extra_map = old_map; 2510 2511 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2512 * aligned */ 2513 m->m_data += MXGEFW_PAD; 2514 2515 m->m_pkthdr.rcvif = ifp; 2516 m->m_len = m->m_pkthdr.len = len; 2517 ss->ipackets++; 2518 eh = mtod(m, struct ether_header *); 2519 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2520 mxge_vlan_tag_remove(m, &csum); 2521 } 2522 /* if the checksum is valid, mark it in the mbuf header */ 2523 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2524 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2525 return; 2526 /* otherwise, it was a UDP frame, or a TCP frame which 2527 we could not do LRO on. Tell the stack that the 2528 checksum is good */ 2529 m->m_pkthdr.csum_data = 0xffff; 2530 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2531 } 2532 /* flowid only valid if RSS hashing is enabled */ 2533 if (sc->num_slices > 1) { 2534 m->m_pkthdr.flowid = (ss - sc->ss); 2535 m->m_flags |= M_FLOWID; 2536 } 2537 /* pass the frame up the stack */ 2538 (*ifp->if_input)(ifp, m); 2539 } 2540 2541 static inline void 2542 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2543 { 2544 mxge_softc_t *sc; 2545 struct ifnet *ifp; 2546 struct ether_header *eh; 2547 struct mbuf *m; 2548 mxge_rx_ring_t *rx; 2549 bus_dmamap_t old_map; 2550 int idx; 2551 uint16_t tcpudp_csum; 2552 2553 sc = ss->sc; 2554 ifp = sc->ifp; 2555 rx = &ss->rx_small; 2556 idx = rx->cnt & rx->mask; 2557 rx->cnt++; 2558 /* save a pointer to the received mbuf */ 2559 m = rx->info[idx].m; 2560 /* try to replace the received mbuf */ 2561 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2562 /* drop the frame -- the old mbuf is re-cycled */ 2563 ifp->if_ierrors++; 2564 return; 2565 } 2566 2567 /* unmap the received buffer */ 2568 old_map = rx->info[idx].map; 2569 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2570 bus_dmamap_unload(rx->dmat, old_map); 2571 2572 /* swap the bus_dmamap_t's */ 2573 rx->info[idx].map = rx->extra_map; 2574 rx->extra_map = old_map; 2575 2576 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2577 * aligned */ 2578 m->m_data += MXGEFW_PAD; 2579 2580 m->m_pkthdr.rcvif = ifp; 2581 m->m_len = m->m_pkthdr.len = len; 2582 ss->ipackets++; 2583 eh = mtod(m, struct ether_header *); 2584 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2585 mxge_vlan_tag_remove(m, &csum); 2586 } 2587 /* if the checksum is valid, mark it in the mbuf header */ 2588 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2589 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2590 return; 2591 /* otherwise, it was a UDP frame, or a TCP frame which 2592 we could not do LRO on. Tell the stack that the 2593 checksum is good */ 2594 m->m_pkthdr.csum_data = 0xffff; 2595 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2596 } 2597 /* flowid only valid if RSS hashing is enabled */ 2598 if (sc->num_slices > 1) { 2599 m->m_pkthdr.flowid = (ss - sc->ss); 2600 m->m_flags |= M_FLOWID; 2601 } 2602 /* pass the frame up the stack */ 2603 (*ifp->if_input)(ifp, m); 2604 } 2605 2606 static inline void 2607 mxge_clean_rx_done(struct mxge_slice_state *ss) 2608 { 2609 mxge_rx_done_t *rx_done = &ss->rx_done; 2610 struct lro_entry *lro; 2611 int limit = 0; 2612 uint16_t length; 2613 uint16_t checksum; 2614 2615 2616 while (rx_done->entry[rx_done->idx].length != 0) { 2617 length = ntohs(rx_done->entry[rx_done->idx].length); 2618 rx_done->entry[rx_done->idx].length = 0; 2619 checksum = rx_done->entry[rx_done->idx].checksum; 2620 if (length <= (MHLEN - MXGEFW_PAD)) 2621 mxge_rx_done_small(ss, length, checksum); 2622 else 2623 mxge_rx_done_big(ss, length, checksum); 2624 rx_done->cnt++; 2625 rx_done->idx = rx_done->cnt & rx_done->mask; 2626 2627 /* limit potential for livelock */ 2628 if (__predict_false(++limit > rx_done->mask / 2)) 2629 break; 2630 } 2631 while (!SLIST_EMPTY(&ss->lro_active)) { 2632 lro = SLIST_FIRST(&ss->lro_active); 2633 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2634 mxge_lro_flush(ss, lro); 2635 } 2636 } 2637 2638 2639 static inline void 2640 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2641 { 2642 struct ifnet *ifp; 2643 mxge_tx_ring_t *tx; 2644 struct mbuf *m; 2645 bus_dmamap_t map; 2646 int idx; 2647 int *flags; 2648 2649 tx = &ss->tx; 2650 ifp = ss->sc->ifp; 2651 while (tx->pkt_done != mcp_idx) { 2652 idx = tx->done & tx->mask; 2653 tx->done++; 2654 m = tx->info[idx].m; 2655 /* mbuf and DMA map only attached to the first 2656 segment per-mbuf */ 2657 if (m != NULL) { 2658 ss->opackets++; 2659 tx->info[idx].m = NULL; 2660 map = tx->info[idx].map; 2661 bus_dmamap_unload(tx->dmat, map); 2662 m_freem(m); 2663 } 2664 if (tx->info[idx].flag) { 2665 tx->info[idx].flag = 0; 2666 tx->pkt_done++; 2667 } 2668 } 2669 2670 /* If we have space, clear IFF_OACTIVE to tell the stack that 2671 its OK to send packets */ 2672 #ifdef IFNET_BUF_RING 2673 flags = &ss->if_drv_flags; 2674 #else 2675 flags = &ifp->if_drv_flags; 2676 #endif 2677 mtx_lock(&ss->tx.mtx); 2678 if ((*flags) & IFF_DRV_OACTIVE && 2679 tx->req - tx->done < (tx->mask + 1)/4) { 2680 *(flags) &= ~IFF_DRV_OACTIVE; 2681 ss->tx.wake++; 2682 mxge_start_locked(ss); 2683 } 2684 #ifdef IFNET_BUF_RING 2685 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2686 /* let the NIC stop polling this queue, since there 2687 * are no more transmits pending */ 2688 if (tx->req == tx->done) { 2689 *tx->send_stop = 1; 2690 tx->queue_active = 0; 2691 tx->deactivate++; 2692 wmb(); 2693 } 2694 } 2695 #endif 2696 mtx_unlock(&ss->tx.mtx); 2697 2698 } 2699 2700 static struct mxge_media_type mxge_xfp_media_types[] = 2701 { 2702 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2703 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2704 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2705 {0, (1 << 5), "10GBASE-ER"}, 2706 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2707 {0, (1 << 3), "10GBASE-SW"}, 2708 {0, (1 << 2), "10GBASE-LW"}, 2709 {0, (1 << 1), "10GBASE-EW"}, 2710 {0, (1 << 0), "Reserved"} 2711 }; 2712 static struct mxge_media_type mxge_sfp_media_types[] = 2713 { 2714 {0, (1 << 7), "Reserved"}, 2715 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2716 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2717 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2718 }; 2719 2720 static void 2721 mxge_set_media(mxge_softc_t *sc, int type) 2722 { 2723 sc->media_flags |= type; 2724 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2725 ifmedia_set(&sc->media, sc->media_flags); 2726 } 2727 2728 2729 /* 2730 * Determine the media type for a NIC. Some XFPs will identify 2731 * themselves only when their link is up, so this is initiated via a 2732 * link up interrupt. However, this can potentially take up to 2733 * several milliseconds, so it is run via the watchdog routine, rather 2734 * than in the interrupt handler itself. This need only be done 2735 * once, not each time the link is up. 2736 */ 2737 static void 2738 mxge_media_probe(mxge_softc_t *sc) 2739 { 2740 mxge_cmd_t cmd; 2741 char *cage_type; 2742 char *ptr; 2743 struct mxge_media_type *mxge_media_types = NULL; 2744 int i, err, ms, mxge_media_type_entries; 2745 uint32_t byte; 2746 2747 sc->need_media_probe = 0; 2748 2749 /* if we've already set a media type, we're done */ 2750 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2751 return; 2752 2753 /* 2754 * parse the product code to deterimine the interface type 2755 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2756 * after the 3rd dash in the driver's cached copy of the 2757 * EEPROM's product code string. 2758 */ 2759 ptr = sc->product_code_string; 2760 if (ptr == NULL) { 2761 device_printf(sc->dev, "Missing product code\n"); 2762 } 2763 2764 for (i = 0; i < 3; i++, ptr++) { 2765 ptr = index(ptr, '-'); 2766 if (ptr == NULL) { 2767 device_printf(sc->dev, 2768 "only %d dashes in PC?!?\n", i); 2769 return; 2770 } 2771 } 2772 if (*ptr == 'C') { 2773 /* -C is CX4 */ 2774 mxge_set_media(sc, IFM_10G_CX4); 2775 return; 2776 } 2777 else if (*ptr == 'Q') { 2778 /* -Q is Quad Ribbon Fiber */ 2779 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2780 /* FreeBSD has no media type for Quad ribbon fiber */ 2781 return; 2782 } 2783 2784 if (*ptr == 'R') { 2785 /* -R is XFP */ 2786 mxge_media_types = mxge_xfp_media_types; 2787 mxge_media_type_entries = 2788 sizeof (mxge_xfp_media_types) / 2789 sizeof (mxge_xfp_media_types[0]); 2790 byte = MXGE_XFP_COMPLIANCE_BYTE; 2791 cage_type = "XFP"; 2792 } 2793 2794 if (*ptr == 'S' || *(ptr +1) == 'S') { 2795 /* -S or -2S is SFP+ */ 2796 mxge_media_types = mxge_sfp_media_types; 2797 mxge_media_type_entries = 2798 sizeof (mxge_sfp_media_types) / 2799 sizeof (mxge_sfp_media_types[0]); 2800 cage_type = "SFP+"; 2801 byte = 3; 2802 } 2803 2804 if (mxge_media_types == NULL) { 2805 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2806 return; 2807 } 2808 2809 /* 2810 * At this point we know the NIC has an XFP cage, so now we 2811 * try to determine what is in the cage by using the 2812 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2813 * register. We read just one byte, which may take over 2814 * a millisecond 2815 */ 2816 2817 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2818 cmd.data1 = byte; 2819 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2820 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2821 device_printf(sc->dev, "failed to read XFP\n"); 2822 } 2823 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2824 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2825 } 2826 if (err != MXGEFW_CMD_OK) { 2827 return; 2828 } 2829 2830 /* now we wait for the data to be cached */ 2831 cmd.data0 = byte; 2832 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2833 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2834 DELAY(1000); 2835 cmd.data0 = byte; 2836 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2837 } 2838 if (err != MXGEFW_CMD_OK) { 2839 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2840 cage_type, err, ms); 2841 return; 2842 } 2843 2844 if (cmd.data0 == mxge_media_types[0].bitmask) { 2845 if (mxge_verbose) 2846 device_printf(sc->dev, "%s:%s\n", cage_type, 2847 mxge_media_types[0].name); 2848 mxge_set_media(sc, IFM_10G_CX4); 2849 return; 2850 } 2851 for (i = 1; i < mxge_media_type_entries; i++) { 2852 if (cmd.data0 & mxge_media_types[i].bitmask) { 2853 if (mxge_verbose) 2854 device_printf(sc->dev, "%s:%s\n", 2855 cage_type, 2856 mxge_media_types[i].name); 2857 2858 mxge_set_media(sc, mxge_media_types[i].flag); 2859 return; 2860 } 2861 } 2862 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2863 cmd.data0); 2864 2865 return; 2866 } 2867 2868 static void 2869 mxge_intr(void *arg) 2870 { 2871 struct mxge_slice_state *ss = arg; 2872 mxge_softc_t *sc = ss->sc; 2873 mcp_irq_data_t *stats = ss->fw_stats; 2874 mxge_tx_ring_t *tx = &ss->tx; 2875 mxge_rx_done_t *rx_done = &ss->rx_done; 2876 uint32_t send_done_count; 2877 uint8_t valid; 2878 2879 2880 #ifndef IFNET_BUF_RING 2881 /* an interrupt on a non-zero slice is implicitly valid 2882 since MSI-X irqs are not shared */ 2883 if (ss != sc->ss) { 2884 mxge_clean_rx_done(ss); 2885 *ss->irq_claim = be32toh(3); 2886 return; 2887 } 2888 #endif 2889 2890 /* make sure the DMA has finished */ 2891 if (!stats->valid) { 2892 return; 2893 } 2894 valid = stats->valid; 2895 2896 if (sc->legacy_irq) { 2897 /* lower legacy IRQ */ 2898 *sc->irq_deassert = 0; 2899 if (!mxge_deassert_wait) 2900 /* don't wait for conf. that irq is low */ 2901 stats->valid = 0; 2902 } else { 2903 stats->valid = 0; 2904 } 2905 2906 /* loop while waiting for legacy irq deassertion */ 2907 do { 2908 /* check for transmit completes and receives */ 2909 send_done_count = be32toh(stats->send_done_count); 2910 while ((send_done_count != tx->pkt_done) || 2911 (rx_done->entry[rx_done->idx].length != 0)) { 2912 if (send_done_count != tx->pkt_done) 2913 mxge_tx_done(ss, (int)send_done_count); 2914 mxge_clean_rx_done(ss); 2915 send_done_count = be32toh(stats->send_done_count); 2916 } 2917 if (sc->legacy_irq && mxge_deassert_wait) 2918 wmb(); 2919 } while (*((volatile uint8_t *) &stats->valid)); 2920 2921 /* fw link & error stats meaningful only on the first slice */ 2922 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2923 if (sc->link_state != stats->link_up) { 2924 sc->link_state = stats->link_up; 2925 if (sc->link_state) { 2926 if_link_state_change(sc->ifp, LINK_STATE_UP); 2927 if (mxge_verbose) 2928 device_printf(sc->dev, "link up\n"); 2929 } else { 2930 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2931 if (mxge_verbose) 2932 device_printf(sc->dev, "link down\n"); 2933 } 2934 sc->need_media_probe = 1; 2935 } 2936 if (sc->rdma_tags_available != 2937 be32toh(stats->rdma_tags_available)) { 2938 sc->rdma_tags_available = 2939 be32toh(stats->rdma_tags_available); 2940 device_printf(sc->dev, "RDMA timed out! %d tags " 2941 "left\n", sc->rdma_tags_available); 2942 } 2943 2944 if (stats->link_down) { 2945 sc->down_cnt += stats->link_down; 2946 sc->link_state = 0; 2947 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2948 } 2949 } 2950 2951 /* check to see if we have rx token to pass back */ 2952 if (valid & 0x1) 2953 *ss->irq_claim = be32toh(3); 2954 *(ss->irq_claim + 1) = be32toh(3); 2955 } 2956 2957 static void 2958 mxge_init(void *arg) 2959 { 2960 } 2961 2962 2963 2964 static void 2965 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2966 { 2967 struct lro_entry *lro_entry; 2968 int i; 2969 2970 while (!SLIST_EMPTY(&ss->lro_free)) { 2971 lro_entry = SLIST_FIRST(&ss->lro_free); 2972 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2973 free(lro_entry, M_DEVBUF); 2974 } 2975 2976 for (i = 0; i <= ss->rx_big.mask; i++) { 2977 if (ss->rx_big.info[i].m == NULL) 2978 continue; 2979 bus_dmamap_unload(ss->rx_big.dmat, 2980 ss->rx_big.info[i].map); 2981 m_freem(ss->rx_big.info[i].m); 2982 ss->rx_big.info[i].m = NULL; 2983 } 2984 2985 for (i = 0; i <= ss->rx_small.mask; i++) { 2986 if (ss->rx_small.info[i].m == NULL) 2987 continue; 2988 bus_dmamap_unload(ss->rx_small.dmat, 2989 ss->rx_small.info[i].map); 2990 m_freem(ss->rx_small.info[i].m); 2991 ss->rx_small.info[i].m = NULL; 2992 } 2993 2994 /* transmit ring used only on the first slice */ 2995 if (ss->tx.info == NULL) 2996 return; 2997 2998 for (i = 0; i <= ss->tx.mask; i++) { 2999 ss->tx.info[i].flag = 0; 3000 if (ss->tx.info[i].m == NULL) 3001 continue; 3002 bus_dmamap_unload(ss->tx.dmat, 3003 ss->tx.info[i].map); 3004 m_freem(ss->tx.info[i].m); 3005 ss->tx.info[i].m = NULL; 3006 } 3007 } 3008 3009 static void 3010 mxge_free_mbufs(mxge_softc_t *sc) 3011 { 3012 int slice; 3013 3014 for (slice = 0; slice < sc->num_slices; slice++) 3015 mxge_free_slice_mbufs(&sc->ss[slice]); 3016 } 3017 3018 static void 3019 mxge_free_slice_rings(struct mxge_slice_state *ss) 3020 { 3021 int i; 3022 3023 3024 if (ss->rx_done.entry != NULL) 3025 mxge_dma_free(&ss->rx_done.dma); 3026 ss->rx_done.entry = NULL; 3027 3028 if (ss->tx.req_bytes != NULL) 3029 free(ss->tx.req_bytes, M_DEVBUF); 3030 ss->tx.req_bytes = NULL; 3031 3032 if (ss->tx.seg_list != NULL) 3033 free(ss->tx.seg_list, M_DEVBUF); 3034 ss->tx.seg_list = NULL; 3035 3036 if (ss->rx_small.shadow != NULL) 3037 free(ss->rx_small.shadow, M_DEVBUF); 3038 ss->rx_small.shadow = NULL; 3039 3040 if (ss->rx_big.shadow != NULL) 3041 free(ss->rx_big.shadow, M_DEVBUF); 3042 ss->rx_big.shadow = NULL; 3043 3044 if (ss->tx.info != NULL) { 3045 if (ss->tx.dmat != NULL) { 3046 for (i = 0; i <= ss->tx.mask; i++) { 3047 bus_dmamap_destroy(ss->tx.dmat, 3048 ss->tx.info[i].map); 3049 } 3050 bus_dma_tag_destroy(ss->tx.dmat); 3051 } 3052 free(ss->tx.info, M_DEVBUF); 3053 } 3054 ss->tx.info = NULL; 3055 3056 if (ss->rx_small.info != NULL) { 3057 if (ss->rx_small.dmat != NULL) { 3058 for (i = 0; i <= ss->rx_small.mask; i++) { 3059 bus_dmamap_destroy(ss->rx_small.dmat, 3060 ss->rx_small.info[i].map); 3061 } 3062 bus_dmamap_destroy(ss->rx_small.dmat, 3063 ss->rx_small.extra_map); 3064 bus_dma_tag_destroy(ss->rx_small.dmat); 3065 } 3066 free(ss->rx_small.info, M_DEVBUF); 3067 } 3068 ss->rx_small.info = NULL; 3069 3070 if (ss->rx_big.info != NULL) { 3071 if (ss->rx_big.dmat != NULL) { 3072 for (i = 0; i <= ss->rx_big.mask; i++) { 3073 bus_dmamap_destroy(ss->rx_big.dmat, 3074 ss->rx_big.info[i].map); 3075 } 3076 bus_dmamap_destroy(ss->rx_big.dmat, 3077 ss->rx_big.extra_map); 3078 bus_dma_tag_destroy(ss->rx_big.dmat); 3079 } 3080 free(ss->rx_big.info, M_DEVBUF); 3081 } 3082 ss->rx_big.info = NULL; 3083 } 3084 3085 static void 3086 mxge_free_rings(mxge_softc_t *sc) 3087 { 3088 int slice; 3089 3090 for (slice = 0; slice < sc->num_slices; slice++) 3091 mxge_free_slice_rings(&sc->ss[slice]); 3092 } 3093 3094 static int 3095 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3096 int tx_ring_entries) 3097 { 3098 mxge_softc_t *sc = ss->sc; 3099 size_t bytes; 3100 int err, i; 3101 3102 err = ENOMEM; 3103 3104 /* allocate per-slice receive resources */ 3105 3106 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3107 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3108 3109 /* allocate the rx shadow rings */ 3110 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3111 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3112 if (ss->rx_small.shadow == NULL) 3113 return err;; 3114 3115 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3116 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3117 if (ss->rx_big.shadow == NULL) 3118 return err;; 3119 3120 /* allocate the rx host info rings */ 3121 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3122 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3123 if (ss->rx_small.info == NULL) 3124 return err;; 3125 3126 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3127 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3128 if (ss->rx_big.info == NULL) 3129 return err;; 3130 3131 /* allocate the rx busdma resources */ 3132 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3133 1, /* alignment */ 3134 4096, /* boundary */ 3135 BUS_SPACE_MAXADDR, /* low */ 3136 BUS_SPACE_MAXADDR, /* high */ 3137 NULL, NULL, /* filter */ 3138 MHLEN, /* maxsize */ 3139 1, /* num segs */ 3140 MHLEN, /* maxsegsize */ 3141 BUS_DMA_ALLOCNOW, /* flags */ 3142 NULL, NULL, /* lock */ 3143 &ss->rx_small.dmat); /* tag */ 3144 if (err != 0) { 3145 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3146 err); 3147 return err;; 3148 } 3149 3150 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3151 1, /* alignment */ 3152 #if MXGE_VIRT_JUMBOS 3153 4096, /* boundary */ 3154 #else 3155 0, /* boundary */ 3156 #endif 3157 BUS_SPACE_MAXADDR, /* low */ 3158 BUS_SPACE_MAXADDR, /* high */ 3159 NULL, NULL, /* filter */ 3160 3*4096, /* maxsize */ 3161 #if MXGE_VIRT_JUMBOS 3162 3, /* num segs */ 3163 4096, /* maxsegsize*/ 3164 #else 3165 1, /* num segs */ 3166 MJUM9BYTES, /* maxsegsize*/ 3167 #endif 3168 BUS_DMA_ALLOCNOW, /* flags */ 3169 NULL, NULL, /* lock */ 3170 &ss->rx_big.dmat); /* tag */ 3171 if (err != 0) { 3172 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3173 err); 3174 return err;; 3175 } 3176 for (i = 0; i <= ss->rx_small.mask; i++) { 3177 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3178 &ss->rx_small.info[i].map); 3179 if (err != 0) { 3180 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3181 err); 3182 return err;; 3183 } 3184 } 3185 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3186 &ss->rx_small.extra_map); 3187 if (err != 0) { 3188 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3189 err); 3190 return err;; 3191 } 3192 3193 for (i = 0; i <= ss->rx_big.mask; i++) { 3194 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3195 &ss->rx_big.info[i].map); 3196 if (err != 0) { 3197 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3198 err); 3199 return err;; 3200 } 3201 } 3202 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3203 &ss->rx_big.extra_map); 3204 if (err != 0) { 3205 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3206 err); 3207 return err;; 3208 } 3209 3210 /* now allocate TX resouces */ 3211 3212 #ifndef IFNET_BUF_RING 3213 /* only use a single TX ring for now */ 3214 if (ss != ss->sc->ss) 3215 return 0; 3216 #endif 3217 3218 ss->tx.mask = tx_ring_entries - 1; 3219 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3220 3221 3222 /* allocate the tx request copy block */ 3223 bytes = 8 + 3224 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3225 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3226 if (ss->tx.req_bytes == NULL) 3227 return err;; 3228 /* ensure req_list entries are aligned to 8 bytes */ 3229 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3230 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3231 3232 /* allocate the tx busdma segment list */ 3233 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3234 ss->tx.seg_list = (bus_dma_segment_t *) 3235 malloc(bytes, M_DEVBUF, M_WAITOK); 3236 if (ss->tx.seg_list == NULL) 3237 return err;; 3238 3239 /* allocate the tx host info ring */ 3240 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3241 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3242 if (ss->tx.info == NULL) 3243 return err;; 3244 3245 /* allocate the tx busdma resources */ 3246 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3247 1, /* alignment */ 3248 sc->tx_boundary, /* boundary */ 3249 BUS_SPACE_MAXADDR, /* low */ 3250 BUS_SPACE_MAXADDR, /* high */ 3251 NULL, NULL, /* filter */ 3252 65536 + 256, /* maxsize */ 3253 ss->tx.max_desc - 2, /* num segs */ 3254 sc->tx_boundary, /* maxsegsz */ 3255 BUS_DMA_ALLOCNOW, /* flags */ 3256 NULL, NULL, /* lock */ 3257 &ss->tx.dmat); /* tag */ 3258 3259 if (err != 0) { 3260 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3261 err); 3262 return err;; 3263 } 3264 3265 /* now use these tags to setup dmamaps for each slot 3266 in the ring */ 3267 for (i = 0; i <= ss->tx.mask; i++) { 3268 err = bus_dmamap_create(ss->tx.dmat, 0, 3269 &ss->tx.info[i].map); 3270 if (err != 0) { 3271 device_printf(sc->dev, "Err %d tx dmamap\n", 3272 err); 3273 return err;; 3274 } 3275 } 3276 return 0; 3277 3278 } 3279 3280 static int 3281 mxge_alloc_rings(mxge_softc_t *sc) 3282 { 3283 mxge_cmd_t cmd; 3284 int tx_ring_size; 3285 int tx_ring_entries, rx_ring_entries; 3286 int err, slice; 3287 3288 /* get ring sizes */ 3289 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3290 tx_ring_size = cmd.data0; 3291 if (err != 0) { 3292 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3293 goto abort; 3294 } 3295 3296 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3297 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3298 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3299 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3300 IFQ_SET_READY(&sc->ifp->if_snd); 3301 3302 for (slice = 0; slice < sc->num_slices; slice++) { 3303 err = mxge_alloc_slice_rings(&sc->ss[slice], 3304 rx_ring_entries, 3305 tx_ring_entries); 3306 if (err != 0) 3307 goto abort; 3308 } 3309 return 0; 3310 3311 abort: 3312 mxge_free_rings(sc); 3313 return err; 3314 3315 } 3316 3317 3318 static void 3319 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3320 { 3321 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3322 3323 if (bufsize < MCLBYTES) { 3324 /* easy, everything fits in a single buffer */ 3325 *big_buf_size = MCLBYTES; 3326 *cl_size = MCLBYTES; 3327 *nbufs = 1; 3328 return; 3329 } 3330 3331 if (bufsize < MJUMPAGESIZE) { 3332 /* still easy, everything still fits in a single buffer */ 3333 *big_buf_size = MJUMPAGESIZE; 3334 *cl_size = MJUMPAGESIZE; 3335 *nbufs = 1; 3336 return; 3337 } 3338 #if MXGE_VIRT_JUMBOS 3339 /* now we need to use virtually contiguous buffers */ 3340 *cl_size = MJUM9BYTES; 3341 *big_buf_size = 4096; 3342 *nbufs = mtu / 4096 + 1; 3343 /* needs to be a power of two, so round up */ 3344 if (*nbufs == 3) 3345 *nbufs = 4; 3346 #else 3347 *cl_size = MJUM9BYTES; 3348 *big_buf_size = MJUM9BYTES; 3349 *nbufs = 1; 3350 #endif 3351 } 3352 3353 static int 3354 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3355 { 3356 mxge_softc_t *sc; 3357 mxge_cmd_t cmd; 3358 bus_dmamap_t map; 3359 struct lro_entry *lro_entry; 3360 int err, i, slice; 3361 3362 3363 sc = ss->sc; 3364 slice = ss - sc->ss; 3365 3366 SLIST_INIT(&ss->lro_free); 3367 SLIST_INIT(&ss->lro_active); 3368 3369 for (i = 0; i < sc->lro_cnt; i++) { 3370 lro_entry = (struct lro_entry *) 3371 malloc(sizeof (*lro_entry), M_DEVBUF, 3372 M_NOWAIT | M_ZERO); 3373 if (lro_entry == NULL) { 3374 sc->lro_cnt = i; 3375 break; 3376 } 3377 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3378 } 3379 /* get the lanai pointers to the send and receive rings */ 3380 3381 err = 0; 3382 #ifndef IFNET_BUF_RING 3383 /* We currently only send from the first slice */ 3384 if (slice == 0) { 3385 #endif 3386 cmd.data0 = slice; 3387 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3388 ss->tx.lanai = 3389 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3390 ss->tx.send_go = (volatile uint32_t *) 3391 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3392 ss->tx.send_stop = (volatile uint32_t *) 3393 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3394 #ifndef IFNET_BUF_RING 3395 } 3396 #endif 3397 cmd.data0 = slice; 3398 err |= mxge_send_cmd(sc, 3399 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3400 ss->rx_small.lanai = 3401 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3402 cmd.data0 = slice; 3403 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3404 ss->rx_big.lanai = 3405 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3406 3407 if (err != 0) { 3408 device_printf(sc->dev, 3409 "failed to get ring sizes or locations\n"); 3410 return EIO; 3411 } 3412 3413 /* stock receive rings */ 3414 for (i = 0; i <= ss->rx_small.mask; i++) { 3415 map = ss->rx_small.info[i].map; 3416 err = mxge_get_buf_small(ss, map, i); 3417 if (err) { 3418 device_printf(sc->dev, "alloced %d/%d smalls\n", 3419 i, ss->rx_small.mask + 1); 3420 return ENOMEM; 3421 } 3422 } 3423 for (i = 0; i <= ss->rx_big.mask; i++) { 3424 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3425 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3426 } 3427 ss->rx_big.nbufs = nbufs; 3428 ss->rx_big.cl_size = cl_size; 3429 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3430 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3431 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3432 map = ss->rx_big.info[i].map; 3433 err = mxge_get_buf_big(ss, map, i); 3434 if (err) { 3435 device_printf(sc->dev, "alloced %d/%d bigs\n", 3436 i, ss->rx_big.mask + 1); 3437 return ENOMEM; 3438 } 3439 } 3440 return 0; 3441 } 3442 3443 static int 3444 mxge_open(mxge_softc_t *sc) 3445 { 3446 mxge_cmd_t cmd; 3447 int err, big_bytes, nbufs, slice, cl_size, i; 3448 bus_addr_t bus; 3449 volatile uint8_t *itable; 3450 struct mxge_slice_state *ss; 3451 3452 /* Copy the MAC address in case it was overridden */ 3453 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3454 3455 err = mxge_reset(sc, 1); 3456 if (err != 0) { 3457 device_printf(sc->dev, "failed to reset\n"); 3458 return EIO; 3459 } 3460 3461 if (sc->num_slices > 1) { 3462 /* setup the indirection table */ 3463 cmd.data0 = sc->num_slices; 3464 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3465 &cmd); 3466 3467 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3468 &cmd); 3469 if (err != 0) { 3470 device_printf(sc->dev, 3471 "failed to setup rss tables\n"); 3472 return err; 3473 } 3474 3475 /* just enable an identity mapping */ 3476 itable = sc->sram + cmd.data0; 3477 for (i = 0; i < sc->num_slices; i++) 3478 itable[i] = (uint8_t)i; 3479 3480 cmd.data0 = 1; 3481 cmd.data1 = mxge_rss_hash_type; 3482 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3483 if (err != 0) { 3484 device_printf(sc->dev, "failed to enable slices\n"); 3485 return err; 3486 } 3487 } 3488 3489 3490 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3491 3492 cmd.data0 = nbufs; 3493 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3494 &cmd); 3495 /* error is only meaningful if we're trying to set 3496 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3497 if (err && nbufs > 1) { 3498 device_printf(sc->dev, 3499 "Failed to set alway-use-n to %d\n", 3500 nbufs); 3501 return EIO; 3502 } 3503 /* Give the firmware the mtu and the big and small buffer 3504 sizes. The firmware wants the big buf size to be a power 3505 of two. Luckily, FreeBSD's clusters are powers of two */ 3506 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3507 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3508 cmd.data0 = MHLEN - MXGEFW_PAD; 3509 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3510 &cmd); 3511 cmd.data0 = big_bytes; 3512 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3513 3514 if (err != 0) { 3515 device_printf(sc->dev, "failed to setup params\n"); 3516 goto abort; 3517 } 3518 3519 /* Now give him the pointer to the stats block */ 3520 for (slice = 0; 3521 #ifdef IFNET_BUF_RING 3522 slice < sc->num_slices; 3523 #else 3524 slice < 1; 3525 #endif 3526 slice++) { 3527 ss = &sc->ss[slice]; 3528 cmd.data0 = 3529 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3530 cmd.data1 = 3531 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3532 cmd.data2 = sizeof(struct mcp_irq_data); 3533 cmd.data2 |= (slice << 16); 3534 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3535 } 3536 3537 if (err != 0) { 3538 bus = sc->ss->fw_stats_dma.bus_addr; 3539 bus += offsetof(struct mcp_irq_data, send_done_count); 3540 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3541 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3542 err = mxge_send_cmd(sc, 3543 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3544 &cmd); 3545 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3546 sc->fw_multicast_support = 0; 3547 } else { 3548 sc->fw_multicast_support = 1; 3549 } 3550 3551 if (err != 0) { 3552 device_printf(sc->dev, "failed to setup params\n"); 3553 goto abort; 3554 } 3555 3556 for (slice = 0; slice < sc->num_slices; slice++) { 3557 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3558 if (err != 0) { 3559 device_printf(sc->dev, "couldn't open slice %d\n", 3560 slice); 3561 goto abort; 3562 } 3563 } 3564 3565 /* Finally, start the firmware running */ 3566 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3567 if (err) { 3568 device_printf(sc->dev, "Couldn't bring up link\n"); 3569 goto abort; 3570 } 3571 #ifdef IFNET_BUF_RING 3572 for (slice = 0; slice < sc->num_slices; slice++) { 3573 ss = &sc->ss[slice]; 3574 ss->if_drv_flags |= IFF_DRV_RUNNING; 3575 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3576 } 3577 #endif 3578 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3579 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3580 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3581 3582 return 0; 3583 3584 3585 abort: 3586 mxge_free_mbufs(sc); 3587 3588 return err; 3589 } 3590 3591 static int 3592 mxge_close(mxge_softc_t *sc) 3593 { 3594 mxge_cmd_t cmd; 3595 int err, old_down_cnt; 3596 #ifdef IFNET_BUF_RING 3597 struct mxge_slice_state *ss; 3598 int slice; 3599 #endif 3600 3601 callout_stop(&sc->co_hdl); 3602 #ifdef IFNET_BUF_RING 3603 for (slice = 0; slice < sc->num_slices; slice++) { 3604 ss = &sc->ss[slice]; 3605 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3606 } 3607 #endif 3608 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3609 old_down_cnt = sc->down_cnt; 3610 wmb(); 3611 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3612 if (err) { 3613 device_printf(sc->dev, "Couldn't bring down link\n"); 3614 } 3615 if (old_down_cnt == sc->down_cnt) { 3616 /* wait for down irq */ 3617 DELAY(10 * sc->intr_coal_delay); 3618 } 3619 wmb(); 3620 if (old_down_cnt == sc->down_cnt) { 3621 device_printf(sc->dev, "never got down irq\n"); 3622 } 3623 3624 mxge_free_mbufs(sc); 3625 3626 return 0; 3627 } 3628 3629 static void 3630 mxge_setup_cfg_space(mxge_softc_t *sc) 3631 { 3632 device_t dev = sc->dev; 3633 int reg; 3634 uint16_t cmd, lnk, pectl; 3635 3636 /* find the PCIe link width and set max read request to 4KB*/ 3637 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3638 lnk = pci_read_config(dev, reg + 0x12, 2); 3639 sc->link_width = (lnk >> 4) & 0x3f; 3640 3641 pectl = pci_read_config(dev, reg + 0x8, 2); 3642 pectl = (pectl & ~0x7000) | (5 << 12); 3643 pci_write_config(dev, reg + 0x8, pectl, 2); 3644 } 3645 3646 /* Enable DMA and Memory space access */ 3647 pci_enable_busmaster(dev); 3648 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3649 cmd |= PCIM_CMD_MEMEN; 3650 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3651 } 3652 3653 static uint32_t 3654 mxge_read_reboot(mxge_softc_t *sc) 3655 { 3656 device_t dev = sc->dev; 3657 uint32_t vs; 3658 3659 /* find the vendor specific offset */ 3660 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3661 device_printf(sc->dev, 3662 "could not find vendor specific offset\n"); 3663 return (uint32_t)-1; 3664 } 3665 /* enable read32 mode */ 3666 pci_write_config(dev, vs + 0x10, 0x3, 1); 3667 /* tell NIC which register to read */ 3668 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3669 return (pci_read_config(dev, vs + 0x14, 4)); 3670 } 3671 3672 static int 3673 mxge_watchdog_reset(mxge_softc_t *sc, int slice) 3674 { 3675 struct pci_devinfo *dinfo; 3676 mxge_tx_ring_t *tx; 3677 int err; 3678 uint32_t reboot; 3679 uint16_t cmd; 3680 3681 err = ENXIO; 3682 3683 device_printf(sc->dev, "Watchdog reset!\n"); 3684 3685 /* 3686 * check to see if the NIC rebooted. If it did, then all of 3687 * PCI config space has been reset, and things like the 3688 * busmaster bit will be zero. If this is the case, then we 3689 * must restore PCI config space before the NIC can be used 3690 * again 3691 */ 3692 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3693 if (cmd == 0xffff) { 3694 /* 3695 * maybe the watchdog caught the NIC rebooting; wait 3696 * up to 100ms for it to finish. If it does not come 3697 * back, then give up 3698 */ 3699 DELAY(1000*100); 3700 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3701 if (cmd == 0xffff) { 3702 device_printf(sc->dev, "NIC disappeared!\n"); 3703 return (err); 3704 } 3705 } 3706 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3707 /* print the reboot status */ 3708 reboot = mxge_read_reboot(sc); 3709 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3710 reboot); 3711 /* restore PCI configuration space */ 3712 dinfo = device_get_ivars(sc->dev); 3713 pci_cfg_restore(sc->dev, dinfo); 3714 3715 /* and redo any changes we made to our config space */ 3716 mxge_setup_cfg_space(sc); 3717 3718 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3719 mxge_close(sc); 3720 err = mxge_open(sc); 3721 } 3722 } else { 3723 tx = &sc->ss[slice].tx; 3724 device_printf(sc->dev, 3725 "NIC did not reboot, slice %d ring state:\n", 3726 slice); 3727 device_printf(sc->dev, 3728 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3729 tx->req, tx->done, tx->queue_active); 3730 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3731 tx->activate, tx->deactivate); 3732 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3733 tx->pkt_done, 3734 be32toh(sc->ss->fw_stats->send_done_count)); 3735 device_printf(sc->dev, "not resetting\n"); 3736 } 3737 return (err); 3738 } 3739 3740 static int 3741 mxge_watchdog(mxge_softc_t *sc) 3742 { 3743 mxge_tx_ring_t *tx; 3744 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3745 int i, err = 0; 3746 3747 /* see if we have outstanding transmits, which 3748 have been pending for more than mxge_ticks */ 3749 for (i = 0; 3750 #ifdef IFNET_BUF_RING 3751 (i < sc->num_slices) && (err == 0); 3752 #else 3753 (i < 1) && (err == 0); 3754 #endif 3755 i++) { 3756 tx = &sc->ss[i].tx; 3757 if (tx->req != tx->done && 3758 tx->watchdog_req != tx->watchdog_done && 3759 tx->done == tx->watchdog_done) { 3760 /* check for pause blocking before resetting */ 3761 if (tx->watchdog_rx_pause == rx_pause) 3762 err = mxge_watchdog_reset(sc, i); 3763 else 3764 device_printf(sc->dev, "Flow control blocking " 3765 "xmits, check link partner\n"); 3766 } 3767 3768 tx->watchdog_req = tx->req; 3769 tx->watchdog_done = tx->done; 3770 tx->watchdog_rx_pause = rx_pause; 3771 } 3772 3773 if (sc->need_media_probe) 3774 mxge_media_probe(sc); 3775 return (err); 3776 } 3777 3778 static void 3779 mxge_update_stats(mxge_softc_t *sc) 3780 { 3781 struct mxge_slice_state *ss; 3782 u_long ipackets = 0; 3783 u_long opackets = 0; 3784 u_long oerrors = 0; 3785 int slice; 3786 3787 for (slice = 0; slice < sc->num_slices; slice++) { 3788 ss = &sc->ss[slice]; 3789 ipackets += ss->ipackets; 3790 opackets += ss->opackets; 3791 oerrors += ss->oerrors; 3792 } 3793 sc->ifp->if_ipackets = ipackets; 3794 sc->ifp->if_opackets = opackets; 3795 sc->ifp->if_oerrors = oerrors; 3796 } 3797 3798 static void 3799 mxge_tick(void *arg) 3800 { 3801 mxge_softc_t *sc = arg; 3802 int err = 0; 3803 3804 /* aggregate stats from different slices */ 3805 mxge_update_stats(sc); 3806 if (!sc->watchdog_countdown) { 3807 err = mxge_watchdog(sc); 3808 sc->watchdog_countdown = 4; 3809 } 3810 sc->watchdog_countdown--; 3811 if (err == 0) 3812 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3813 3814 } 3815 3816 static int 3817 mxge_media_change(struct ifnet *ifp) 3818 { 3819 return EINVAL; 3820 } 3821 3822 static int 3823 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3824 { 3825 struct ifnet *ifp = sc->ifp; 3826 int real_mtu, old_mtu; 3827 int err = 0; 3828 3829 3830 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3831 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3832 return EINVAL; 3833 mtx_lock(&sc->driver_mtx); 3834 old_mtu = ifp->if_mtu; 3835 ifp->if_mtu = mtu; 3836 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3837 mxge_close(sc); 3838 err = mxge_open(sc); 3839 if (err != 0) { 3840 ifp->if_mtu = old_mtu; 3841 mxge_close(sc); 3842 (void) mxge_open(sc); 3843 } 3844 } 3845 mtx_unlock(&sc->driver_mtx); 3846 return err; 3847 } 3848 3849 static void 3850 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3851 { 3852 mxge_softc_t *sc = ifp->if_softc; 3853 3854 3855 if (sc == NULL) 3856 return; 3857 ifmr->ifm_status = IFM_AVALID; 3858 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3859 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3860 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3861 } 3862 3863 static int 3864 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3865 { 3866 mxge_softc_t *sc = ifp->if_softc; 3867 struct ifreq *ifr = (struct ifreq *)data; 3868 int err, mask; 3869 3870 err = 0; 3871 switch (command) { 3872 case SIOCSIFADDR: 3873 case SIOCGIFADDR: 3874 err = ether_ioctl(ifp, command, data); 3875 break; 3876 3877 case SIOCSIFMTU: 3878 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3879 break; 3880 3881 case SIOCSIFFLAGS: 3882 mtx_lock(&sc->driver_mtx); 3883 if (ifp->if_flags & IFF_UP) { 3884 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3885 err = mxge_open(sc); 3886 } else { 3887 /* take care of promis can allmulti 3888 flag chages */ 3889 mxge_change_promisc(sc, 3890 ifp->if_flags & IFF_PROMISC); 3891 mxge_set_multicast_list(sc); 3892 } 3893 } else { 3894 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3895 mxge_close(sc); 3896 } 3897 } 3898 mtx_unlock(&sc->driver_mtx); 3899 break; 3900 3901 case SIOCADDMULTI: 3902 case SIOCDELMULTI: 3903 mtx_lock(&sc->driver_mtx); 3904 mxge_set_multicast_list(sc); 3905 mtx_unlock(&sc->driver_mtx); 3906 break; 3907 3908 case SIOCSIFCAP: 3909 mtx_lock(&sc->driver_mtx); 3910 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3911 if (mask & IFCAP_TXCSUM) { 3912 if (IFCAP_TXCSUM & ifp->if_capenable) { 3913 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3914 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3915 | CSUM_TSO); 3916 } else { 3917 ifp->if_capenable |= IFCAP_TXCSUM; 3918 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3919 } 3920 } else if (mask & IFCAP_RXCSUM) { 3921 if (IFCAP_RXCSUM & ifp->if_capenable) { 3922 ifp->if_capenable &= ~IFCAP_RXCSUM; 3923 sc->csum_flag = 0; 3924 } else { 3925 ifp->if_capenable |= IFCAP_RXCSUM; 3926 sc->csum_flag = 1; 3927 } 3928 } 3929 if (mask & IFCAP_TSO4) { 3930 if (IFCAP_TSO4 & ifp->if_capenable) { 3931 ifp->if_capenable &= ~IFCAP_TSO4; 3932 ifp->if_hwassist &= ~CSUM_TSO; 3933 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3934 ifp->if_capenable |= IFCAP_TSO4; 3935 ifp->if_hwassist |= CSUM_TSO; 3936 } else { 3937 printf("mxge requires tx checksum offload" 3938 " be enabled to use TSO\n"); 3939 err = EINVAL; 3940 } 3941 } 3942 if (mask & IFCAP_LRO) { 3943 if (IFCAP_LRO & ifp->if_capenable) 3944 err = mxge_change_lro_locked(sc, 0); 3945 else 3946 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3947 } 3948 if (mask & IFCAP_VLAN_HWTAGGING) 3949 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3950 mtx_unlock(&sc->driver_mtx); 3951 VLAN_CAPABILITIES(ifp); 3952 3953 break; 3954 3955 case SIOCGIFMEDIA: 3956 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3957 &sc->media, command); 3958 break; 3959 3960 default: 3961 err = ENOTTY; 3962 } 3963 return err; 3964 } 3965 3966 static void 3967 mxge_fetch_tunables(mxge_softc_t *sc) 3968 { 3969 3970 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 3971 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3972 &mxge_flow_control); 3973 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3974 &mxge_intr_coal_delay); 3975 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3976 &mxge_nvidia_ecrc_enable); 3977 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3978 &mxge_force_firmware); 3979 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3980 &mxge_deassert_wait); 3981 TUNABLE_INT_FETCH("hw.mxge.verbose", 3982 &mxge_verbose); 3983 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3984 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3985 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 3986 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 3987 if (sc->lro_cnt != 0) 3988 mxge_lro_cnt = sc->lro_cnt; 3989 3990 if (bootverbose) 3991 mxge_verbose = 1; 3992 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3993 mxge_intr_coal_delay = 30; 3994 if (mxge_ticks == 0) 3995 mxge_ticks = hz / 2; 3996 sc->pause = mxge_flow_control; 3997 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 3998 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) { 3999 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4000 } 4001 } 4002 4003 4004 static void 4005 mxge_free_slices(mxge_softc_t *sc) 4006 { 4007 struct mxge_slice_state *ss; 4008 int i; 4009 4010 4011 if (sc->ss == NULL) 4012 return; 4013 4014 for (i = 0; i < sc->num_slices; i++) { 4015 ss = &sc->ss[i]; 4016 if (ss->fw_stats != NULL) { 4017 mxge_dma_free(&ss->fw_stats_dma); 4018 ss->fw_stats = NULL; 4019 #ifdef IFNET_BUF_RING 4020 if (ss->tx.br != NULL) { 4021 drbr_free(ss->tx.br, M_DEVBUF); 4022 ss->tx.br = NULL; 4023 } 4024 #endif 4025 mtx_destroy(&ss->tx.mtx); 4026 } 4027 if (ss->rx_done.entry != NULL) { 4028 mxge_dma_free(&ss->rx_done.dma); 4029 ss->rx_done.entry = NULL; 4030 } 4031 } 4032 free(sc->ss, M_DEVBUF); 4033 sc->ss = NULL; 4034 } 4035 4036 static int 4037 mxge_alloc_slices(mxge_softc_t *sc) 4038 { 4039 mxge_cmd_t cmd; 4040 struct mxge_slice_state *ss; 4041 size_t bytes; 4042 int err, i, max_intr_slots; 4043 4044 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4045 if (err != 0) { 4046 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4047 return err; 4048 } 4049 sc->rx_ring_size = cmd.data0; 4050 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4051 4052 bytes = sizeof (*sc->ss) * sc->num_slices; 4053 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4054 if (sc->ss == NULL) 4055 return (ENOMEM); 4056 for (i = 0; i < sc->num_slices; i++) { 4057 ss = &sc->ss[i]; 4058 4059 ss->sc = sc; 4060 4061 /* allocate per-slice rx interrupt queues */ 4062 4063 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4064 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4065 if (err != 0) 4066 goto abort; 4067 ss->rx_done.entry = ss->rx_done.dma.addr; 4068 bzero(ss->rx_done.entry, bytes); 4069 4070 /* 4071 * allocate the per-slice firmware stats; stats 4072 * (including tx) are used used only on the first 4073 * slice for now 4074 */ 4075 #ifndef IFNET_BUF_RING 4076 if (i > 0) 4077 continue; 4078 #endif 4079 4080 bytes = sizeof (*ss->fw_stats); 4081 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4082 sizeof (*ss->fw_stats), 64); 4083 if (err != 0) 4084 goto abort; 4085 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4086 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4087 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4088 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4089 #ifdef IFNET_BUF_RING 4090 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4091 &ss->tx.mtx); 4092 #endif 4093 } 4094 4095 return (0); 4096 4097 abort: 4098 mxge_free_slices(sc); 4099 return (ENOMEM); 4100 } 4101 4102 static void 4103 mxge_slice_probe(mxge_softc_t *sc) 4104 { 4105 mxge_cmd_t cmd; 4106 char *old_fw; 4107 int msix_cnt, status, max_intr_slots; 4108 4109 sc->num_slices = 1; 4110 /* 4111 * don't enable multiple slices if they are not enabled, 4112 * or if this is not an SMP system 4113 */ 4114 4115 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4116 return; 4117 4118 /* see how many MSI-X interrupts are available */ 4119 msix_cnt = pci_msix_count(sc->dev); 4120 if (msix_cnt < 2) 4121 return; 4122 4123 /* now load the slice aware firmware see what it supports */ 4124 old_fw = sc->fw_name; 4125 if (old_fw == mxge_fw_aligned) 4126 sc->fw_name = mxge_fw_rss_aligned; 4127 else 4128 sc->fw_name = mxge_fw_rss_unaligned; 4129 status = mxge_load_firmware(sc, 0); 4130 if (status != 0) { 4131 device_printf(sc->dev, "Falling back to a single slice\n"); 4132 return; 4133 } 4134 4135 /* try to send a reset command to the card to see if it 4136 is alive */ 4137 memset(&cmd, 0, sizeof (cmd)); 4138 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4139 if (status != 0) { 4140 device_printf(sc->dev, "failed reset\n"); 4141 goto abort_with_fw; 4142 } 4143 4144 /* get rx ring size */ 4145 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4146 if (status != 0) { 4147 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4148 goto abort_with_fw; 4149 } 4150 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4151 4152 /* tell it the size of the interrupt queues */ 4153 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4154 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4155 if (status != 0) { 4156 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4157 goto abort_with_fw; 4158 } 4159 4160 /* ask the maximum number of slices it supports */ 4161 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4162 if (status != 0) { 4163 device_printf(sc->dev, 4164 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4165 goto abort_with_fw; 4166 } 4167 sc->num_slices = cmd.data0; 4168 if (sc->num_slices > msix_cnt) 4169 sc->num_slices = msix_cnt; 4170 4171 if (mxge_max_slices == -1) { 4172 /* cap to number of CPUs in system */ 4173 if (sc->num_slices > mp_ncpus) 4174 sc->num_slices = mp_ncpus; 4175 } else { 4176 if (sc->num_slices > mxge_max_slices) 4177 sc->num_slices = mxge_max_slices; 4178 } 4179 /* make sure it is a power of two */ 4180 while (sc->num_slices & (sc->num_slices - 1)) 4181 sc->num_slices--; 4182 4183 if (mxge_verbose) 4184 device_printf(sc->dev, "using %d slices\n", 4185 sc->num_slices); 4186 4187 return; 4188 4189 abort_with_fw: 4190 sc->fw_name = old_fw; 4191 (void) mxge_load_firmware(sc, 0); 4192 } 4193 4194 static int 4195 mxge_add_msix_irqs(mxge_softc_t *sc) 4196 { 4197 size_t bytes; 4198 int count, err, i, rid; 4199 4200 rid = PCIR_BAR(2); 4201 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4202 &rid, RF_ACTIVE); 4203 4204 if (sc->msix_table_res == NULL) { 4205 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4206 return ENXIO; 4207 } 4208 4209 count = sc->num_slices; 4210 err = pci_alloc_msix(sc->dev, &count); 4211 if (err != 0) { 4212 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4213 "err = %d \n", sc->num_slices, err); 4214 goto abort_with_msix_table; 4215 } 4216 if (count < sc->num_slices) { 4217 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4218 count, sc->num_slices); 4219 device_printf(sc->dev, 4220 "Try setting hw.mxge.max_slices to %d\n", 4221 count); 4222 err = ENOSPC; 4223 goto abort_with_msix; 4224 } 4225 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4226 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4227 if (sc->msix_irq_res == NULL) { 4228 err = ENOMEM; 4229 goto abort_with_msix; 4230 } 4231 4232 for (i = 0; i < sc->num_slices; i++) { 4233 rid = i + 1; 4234 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4235 SYS_RES_IRQ, 4236 &rid, RF_ACTIVE); 4237 if (sc->msix_irq_res[i] == NULL) { 4238 device_printf(sc->dev, "couldn't allocate IRQ res" 4239 " for message %d\n", i); 4240 err = ENXIO; 4241 goto abort_with_res; 4242 } 4243 } 4244 4245 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4246 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4247 4248 for (i = 0; i < sc->num_slices; i++) { 4249 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4250 INTR_TYPE_NET | INTR_MPSAFE, 4251 #if __FreeBSD_version > 700030 4252 NULL, 4253 #endif 4254 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4255 if (err != 0) { 4256 device_printf(sc->dev, "couldn't setup intr for " 4257 "message %d\n", i); 4258 goto abort_with_intr; 4259 } 4260 } 4261 4262 if (mxge_verbose) { 4263 device_printf(sc->dev, "using %d msix IRQs:", 4264 sc->num_slices); 4265 for (i = 0; i < sc->num_slices; i++) 4266 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4267 printf("\n"); 4268 } 4269 return (0); 4270 4271 abort_with_intr: 4272 for (i = 0; i < sc->num_slices; i++) { 4273 if (sc->msix_ih[i] != NULL) { 4274 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4275 sc->msix_ih[i]); 4276 sc->msix_ih[i] = NULL; 4277 } 4278 } 4279 free(sc->msix_ih, M_DEVBUF); 4280 4281 4282 abort_with_res: 4283 for (i = 0; i < sc->num_slices; i++) { 4284 rid = i + 1; 4285 if (sc->msix_irq_res[i] != NULL) 4286 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4287 sc->msix_irq_res[i]); 4288 sc->msix_irq_res[i] = NULL; 4289 } 4290 free(sc->msix_irq_res, M_DEVBUF); 4291 4292 4293 abort_with_msix: 4294 pci_release_msi(sc->dev); 4295 4296 abort_with_msix_table: 4297 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4298 sc->msix_table_res); 4299 4300 return err; 4301 } 4302 4303 static int 4304 mxge_add_single_irq(mxge_softc_t *sc) 4305 { 4306 int count, err, rid; 4307 4308 count = pci_msi_count(sc->dev); 4309 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4310 rid = 1; 4311 } else { 4312 rid = 0; 4313 sc->legacy_irq = 1; 4314 } 4315 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4316 1, RF_SHAREABLE | RF_ACTIVE); 4317 if (sc->irq_res == NULL) { 4318 device_printf(sc->dev, "could not alloc interrupt\n"); 4319 return ENXIO; 4320 } 4321 if (mxge_verbose) 4322 device_printf(sc->dev, "using %s irq %ld\n", 4323 sc->legacy_irq ? "INTx" : "MSI", 4324 rman_get_start(sc->irq_res)); 4325 err = bus_setup_intr(sc->dev, sc->irq_res, 4326 INTR_TYPE_NET | INTR_MPSAFE, 4327 #if __FreeBSD_version > 700030 4328 NULL, 4329 #endif 4330 mxge_intr, &sc->ss[0], &sc->ih); 4331 if (err != 0) { 4332 bus_release_resource(sc->dev, SYS_RES_IRQ, 4333 sc->legacy_irq ? 0 : 1, sc->irq_res); 4334 if (!sc->legacy_irq) 4335 pci_release_msi(sc->dev); 4336 } 4337 return err; 4338 } 4339 4340 static void 4341 mxge_rem_msix_irqs(mxge_softc_t *sc) 4342 { 4343 int i, rid; 4344 4345 for (i = 0; i < sc->num_slices; i++) { 4346 if (sc->msix_ih[i] != NULL) { 4347 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4348 sc->msix_ih[i]); 4349 sc->msix_ih[i] = NULL; 4350 } 4351 } 4352 free(sc->msix_ih, M_DEVBUF); 4353 4354 for (i = 0; i < sc->num_slices; i++) { 4355 rid = i + 1; 4356 if (sc->msix_irq_res[i] != NULL) 4357 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4358 sc->msix_irq_res[i]); 4359 sc->msix_irq_res[i] = NULL; 4360 } 4361 free(sc->msix_irq_res, M_DEVBUF); 4362 4363 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4364 sc->msix_table_res); 4365 4366 pci_release_msi(sc->dev); 4367 return; 4368 } 4369 4370 static void 4371 mxge_rem_single_irq(mxge_softc_t *sc) 4372 { 4373 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4374 bus_release_resource(sc->dev, SYS_RES_IRQ, 4375 sc->legacy_irq ? 0 : 1, sc->irq_res); 4376 if (!sc->legacy_irq) 4377 pci_release_msi(sc->dev); 4378 } 4379 4380 static void 4381 mxge_rem_irq(mxge_softc_t *sc) 4382 { 4383 if (sc->num_slices > 1) 4384 mxge_rem_msix_irqs(sc); 4385 else 4386 mxge_rem_single_irq(sc); 4387 } 4388 4389 static int 4390 mxge_add_irq(mxge_softc_t *sc) 4391 { 4392 int err; 4393 4394 if (sc->num_slices > 1) 4395 err = mxge_add_msix_irqs(sc); 4396 else 4397 err = mxge_add_single_irq(sc); 4398 4399 if (0 && err == 0 && sc->num_slices > 1) { 4400 mxge_rem_msix_irqs(sc); 4401 err = mxge_add_msix_irqs(sc); 4402 } 4403 return err; 4404 } 4405 4406 4407 static int 4408 mxge_attach(device_t dev) 4409 { 4410 mxge_softc_t *sc = device_get_softc(dev); 4411 struct ifnet *ifp; 4412 int err, rid; 4413 4414 sc->dev = dev; 4415 mxge_fetch_tunables(sc); 4416 4417 err = bus_dma_tag_create(NULL, /* parent */ 4418 1, /* alignment */ 4419 0, /* boundary */ 4420 BUS_SPACE_MAXADDR, /* low */ 4421 BUS_SPACE_MAXADDR, /* high */ 4422 NULL, NULL, /* filter */ 4423 65536 + 256, /* maxsize */ 4424 MXGE_MAX_SEND_DESC, /* num segs */ 4425 65536, /* maxsegsize */ 4426 0, /* flags */ 4427 NULL, NULL, /* lock */ 4428 &sc->parent_dmat); /* tag */ 4429 4430 if (err != 0) { 4431 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4432 err); 4433 goto abort_with_nothing; 4434 } 4435 4436 ifp = sc->ifp = if_alloc(IFT_ETHER); 4437 if (ifp == NULL) { 4438 device_printf(dev, "can not if_alloc()\n"); 4439 err = ENOSPC; 4440 goto abort_with_parent_dmat; 4441 } 4442 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4443 4444 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4445 device_get_nameunit(dev)); 4446 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4447 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4448 "%s:drv", device_get_nameunit(dev)); 4449 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4450 MTX_NETWORK_LOCK, MTX_DEF); 4451 4452 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4453 4454 mxge_setup_cfg_space(sc); 4455 4456 /* Map the board into the kernel */ 4457 rid = PCIR_BARS; 4458 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4459 ~0, 1, RF_ACTIVE); 4460 if (sc->mem_res == NULL) { 4461 device_printf(dev, "could not map memory\n"); 4462 err = ENXIO; 4463 goto abort_with_lock; 4464 } 4465 sc->sram = rman_get_virtual(sc->mem_res); 4466 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4467 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4468 device_printf(dev, "impossible memory region size %ld\n", 4469 rman_get_size(sc->mem_res)); 4470 err = ENXIO; 4471 goto abort_with_mem_res; 4472 } 4473 4474 /* make NULL terminated copy of the EEPROM strings section of 4475 lanai SRAM */ 4476 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4477 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4478 rman_get_bushandle(sc->mem_res), 4479 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4480 sc->eeprom_strings, 4481 MXGE_EEPROM_STRINGS_SIZE - 2); 4482 err = mxge_parse_strings(sc); 4483 if (err != 0) 4484 goto abort_with_mem_res; 4485 4486 /* Enable write combining for efficient use of PCIe bus */ 4487 mxge_enable_wc(sc); 4488 4489 /* Allocate the out of band dma memory */ 4490 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4491 sizeof (mxge_cmd_t), 64); 4492 if (err != 0) 4493 goto abort_with_mem_res; 4494 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4495 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4496 if (err != 0) 4497 goto abort_with_cmd_dma; 4498 4499 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4500 if (err != 0) 4501 goto abort_with_zeropad_dma; 4502 4503 /* select & load the firmware */ 4504 err = mxge_select_firmware(sc); 4505 if (err != 0) 4506 goto abort_with_dmabench; 4507 sc->intr_coal_delay = mxge_intr_coal_delay; 4508 4509 mxge_slice_probe(sc); 4510 err = mxge_alloc_slices(sc); 4511 if (err != 0) 4512 goto abort_with_dmabench; 4513 4514 err = mxge_reset(sc, 0); 4515 if (err != 0) 4516 goto abort_with_slices; 4517 4518 err = mxge_alloc_rings(sc); 4519 if (err != 0) { 4520 device_printf(sc->dev, "failed to allocate rings\n"); 4521 goto abort_with_dmabench; 4522 } 4523 4524 err = mxge_add_irq(sc); 4525 if (err != 0) { 4526 device_printf(sc->dev, "failed to add irq\n"); 4527 goto abort_with_rings; 4528 } 4529 4530 ifp->if_baudrate = IF_Gbps(10UL); 4531 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4532 IFCAP_VLAN_MTU | IFCAP_LRO; 4533 4534 #ifdef MXGE_NEW_VLAN_API 4535 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4536 #endif 4537 4538 sc->max_mtu = mxge_max_mtu(sc); 4539 if (sc->max_mtu >= 9000) 4540 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4541 else 4542 device_printf(dev, "MTU limited to %d. Install " 4543 "latest firmware for 9000 byte jumbo support\n", 4544 sc->max_mtu - ETHER_HDR_LEN); 4545 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4546 ifp->if_capenable = ifp->if_capabilities; 4547 if (sc->lro_cnt == 0) 4548 ifp->if_capenable &= ~IFCAP_LRO; 4549 sc->csum_flag = 1; 4550 ifp->if_init = mxge_init; 4551 ifp->if_softc = sc; 4552 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4553 ifp->if_ioctl = mxge_ioctl; 4554 ifp->if_start = mxge_start; 4555 /* Initialise the ifmedia structure */ 4556 ifmedia_init(&sc->media, 0, mxge_media_change, 4557 mxge_media_status); 4558 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4559 mxge_media_probe(sc); 4560 ether_ifattach(ifp, sc->mac_addr); 4561 /* ether_ifattach sets mtu to 1500 */ 4562 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 4563 ifp->if_mtu = 9000; 4564 4565 mxge_add_sysctls(sc); 4566 #ifdef IFNET_BUF_RING 4567 ifp->if_transmit = mxge_transmit; 4568 ifp->if_qflush = mxge_qflush; 4569 #endif 4570 return 0; 4571 4572 abort_with_rings: 4573 mxge_free_rings(sc); 4574 abort_with_slices: 4575 mxge_free_slices(sc); 4576 abort_with_dmabench: 4577 mxge_dma_free(&sc->dmabench_dma); 4578 abort_with_zeropad_dma: 4579 mxge_dma_free(&sc->zeropad_dma); 4580 abort_with_cmd_dma: 4581 mxge_dma_free(&sc->cmd_dma); 4582 abort_with_mem_res: 4583 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4584 abort_with_lock: 4585 pci_disable_busmaster(dev); 4586 mtx_destroy(&sc->cmd_mtx); 4587 mtx_destroy(&sc->driver_mtx); 4588 if_free(ifp); 4589 abort_with_parent_dmat: 4590 bus_dma_tag_destroy(sc->parent_dmat); 4591 4592 abort_with_nothing: 4593 return err; 4594 } 4595 4596 static int 4597 mxge_detach(device_t dev) 4598 { 4599 mxge_softc_t *sc = device_get_softc(dev); 4600 4601 if (mxge_vlans_active(sc)) { 4602 device_printf(sc->dev, 4603 "Detach vlans before removing module\n"); 4604 return EBUSY; 4605 } 4606 mtx_lock(&sc->driver_mtx); 4607 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4608 mxge_close(sc); 4609 mtx_unlock(&sc->driver_mtx); 4610 ether_ifdetach(sc->ifp); 4611 callout_drain(&sc->co_hdl); 4612 ifmedia_removeall(&sc->media); 4613 mxge_dummy_rdma(sc, 0); 4614 mxge_rem_sysctls(sc); 4615 mxge_rem_irq(sc); 4616 mxge_free_rings(sc); 4617 mxge_free_slices(sc); 4618 mxge_dma_free(&sc->dmabench_dma); 4619 mxge_dma_free(&sc->zeropad_dma); 4620 mxge_dma_free(&sc->cmd_dma); 4621 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4622 pci_disable_busmaster(dev); 4623 mtx_destroy(&sc->cmd_mtx); 4624 mtx_destroy(&sc->driver_mtx); 4625 if_free(sc->ifp); 4626 bus_dma_tag_destroy(sc->parent_dmat); 4627 return 0; 4628 } 4629 4630 static int 4631 mxge_shutdown(device_t dev) 4632 { 4633 return 0; 4634 } 4635 4636 /* 4637 This file uses Myri10GE driver indentation. 4638 4639 Local Variables: 4640 c-file-style:"linux" 4641 tab-width:8 4642 End: 4643 */ 4644