1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 /* count xmits ourselves, rather than via drbr */ 51 #define NO_SLOW_STATS 52 #include <net/if.h> 53 #include <net/if_arp.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/if_vlan_var.h> 62 #include <net/zlib.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 69 #include <machine/bus.h> 70 #include <machine/in_cksum.h> 71 #include <machine/resource.h> 72 #include <sys/bus.h> 73 #include <sys/rman.h> 74 #include <sys/smp.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 /*#define MXGE_FAKE_IFP*/ 90 #include <dev/mxge/if_mxge_var.h> 91 #ifdef IFNET_BUF_RING 92 #include <sys/buf_ring.h> 93 #endif 94 95 #include "opt_inet.h" 96 97 /* tunable params */ 98 static int mxge_nvidia_ecrc_enable = 1; 99 static int mxge_force_firmware = 0; 100 static int mxge_intr_coal_delay = 30; 101 static int mxge_deassert_wait = 1; 102 static int mxge_flow_control = 1; 103 static int mxge_verbose = 0; 104 static int mxge_lro_cnt = 8; 105 static int mxge_ticks; 106 static int mxge_max_slices = 1; 107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108 static int mxge_always_promisc = 0; 109 static int mxge_initial_mtu = ETHERMTU_JUMBO; 110 static int mxge_throttle = 0; 111 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112 static char *mxge_fw_aligned = "mxge_eth_z8e"; 113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116 static int mxge_probe(device_t dev); 117 static int mxge_attach(device_t dev); 118 static int mxge_detach(device_t dev); 119 static int mxge_shutdown(device_t dev); 120 static void mxge_intr(void *arg); 121 122 static device_method_t mxge_methods[] = 123 { 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130 }; 131 132 static driver_t mxge_driver = 133 { 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137 }; 138 139 static devclass_t mxge_devclass; 140 141 /* Declare ourselves to be a child of the PCI bus.*/ 142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148 static int mxge_close(mxge_softc_t *sc, int down); 149 static int mxge_open(mxge_softc_t *sc); 150 static void mxge_tick(void *arg); 151 152 static int 153 mxge_probe(device_t dev) 154 { 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 200 /* callback to get our DMA address */ 201 static void 202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204 { 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263 abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265 abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268 } 269 270 271 static void 272 mxge_dma_free(mxge_dma_t *dma) 273 { 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277 } 278 279 /* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286 static int 287 mxge_parse_strings(mxge_softc_t *sc) 288 { 289 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327 } 328 329 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330 static void 331 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332 { 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381 #if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390 #endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452 } 453 #else 454 static void 455 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456 { 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460 } 461 #endif 462 463 464 static int 465 mxge_dma_test(mxge_softc_t *sc, int test_type) 466 { 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517 abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523 } 524 525 /* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544 static int 545 mxge_firmware_probe(mxge_softc_t *sc) 546 { 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595 } 596 597 static int 598 mxge_select_firmware(mxge_softc_t *sc) 599 { 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631 abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640 } 641 642 union qualhack 643 { 644 const char *ro_char; 645 char *rw_char; 646 }; 647 648 static int 649 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650 { 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static void * 680 z_alloc(void *nil, u_int items, u_int size) 681 { 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686 } 687 688 static void 689 z_free(void *nil, void *ptr) 690 { 691 free(ptr, M_TEMP); 692 } 693 694 695 static int 696 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697 { 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770 abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772 abort_with_zs: 773 inflateEnd(&zs); 774 abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777 } 778 779 /* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784 static void 785 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786 { 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834 } 835 836 static int 837 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838 { 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 case MXGEFW_CMD_ERROR_I2C_ABSENT: 887 err = ENXIO; 888 break; 889 default: 890 device_printf(sc->dev, 891 "mxge: command %d " 892 "failed, result = %d\n", 893 cmd, be32toh(response->result)); 894 err = ENXIO; 895 break; 896 } 897 if (err != EAGAIN) 898 break; 899 } 900 if (err == EAGAIN) 901 device_printf(sc->dev, "mxge: command %d timed out" 902 "result = %d\n", 903 cmd, be32toh(response->result)); 904 mtx_unlock(&sc->cmd_mtx); 905 return err; 906 } 907 908 static int 909 mxge_adopt_running_firmware(mxge_softc_t *sc) 910 { 911 struct mcp_gen_header *hdr; 912 const size_t bytes = sizeof (struct mcp_gen_header); 913 size_t hdr_offset; 914 int status; 915 916 /* find running firmware header */ 917 hdr_offset = htobe32(*(volatile uint32_t *) 918 (sc->sram + MCP_HEADER_PTR_OFFSET)); 919 920 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 921 device_printf(sc->dev, 922 "Running firmware has bad header offset (%d)\n", 923 (int)hdr_offset); 924 return EIO; 925 } 926 927 /* copy header of running firmware from SRAM to host memory to 928 * validate firmware */ 929 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 930 if (hdr == NULL) { 931 device_printf(sc->dev, "could not malloc firmware hdr\n"); 932 return ENOMEM; 933 } 934 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 935 rman_get_bushandle(sc->mem_res), 936 hdr_offset, (char *)hdr, bytes); 937 status = mxge_validate_firmware(sc, hdr); 938 free(hdr, M_DEVBUF); 939 940 /* 941 * check to see if adopted firmware has bug where adopting 942 * it will cause broadcasts to be filtered unless the NIC 943 * is kept in ALLMULTI mode 944 */ 945 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 946 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 947 sc->adopted_rx_filter_bug = 1; 948 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 949 "working around rx filter bug\n", 950 sc->fw_ver_major, sc->fw_ver_minor, 951 sc->fw_ver_tiny); 952 } 953 954 return status; 955 } 956 957 958 static int 959 mxge_load_firmware(mxge_softc_t *sc, int adopt) 960 { 961 volatile uint32_t *confirm; 962 volatile char *submit; 963 char buf_bytes[72]; 964 uint32_t *buf, size, dma_low, dma_high; 965 int status, i; 966 967 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 968 969 size = sc->sram_size; 970 status = mxge_load_firmware_helper(sc, &size); 971 if (status) { 972 if (!adopt) 973 return status; 974 /* Try to use the currently running firmware, if 975 it is new enough */ 976 status = mxge_adopt_running_firmware(sc); 977 if (status) { 978 device_printf(sc->dev, 979 "failed to adopt running firmware\n"); 980 return status; 981 } 982 device_printf(sc->dev, 983 "Successfully adopted running firmware\n"); 984 if (sc->tx_boundary == 4096) { 985 device_printf(sc->dev, 986 "Using firmware currently running on NIC" 987 ". For optimal\n"); 988 device_printf(sc->dev, 989 "performance consider loading optimized " 990 "firmware\n"); 991 } 992 sc->fw_name = mxge_fw_unaligned; 993 sc->tx_boundary = 2048; 994 return 0; 995 } 996 /* clear confirmation addr */ 997 confirm = (volatile uint32_t *)sc->cmd; 998 *confirm = 0; 999 wmb(); 1000 /* send a reload command to the bootstrap MCP, and wait for the 1001 response in the confirmation address. The firmware should 1002 write a -1 there to indicate it is alive and well 1003 */ 1004 1005 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1006 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1007 1008 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1009 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1010 buf[2] = htobe32(0xffffffff); /* confirm data */ 1011 1012 /* FIX: All newest firmware should un-protect the bottom of 1013 the sram before handoff. However, the very first interfaces 1014 do not. Therefore the handoff copy must skip the first 8 bytes 1015 */ 1016 /* where the code starts*/ 1017 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1018 buf[4] = htobe32(size - 8); /* length of code */ 1019 buf[5] = htobe32(8); /* where to copy to */ 1020 buf[6] = htobe32(0); /* where to jump to */ 1021 1022 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1023 mxge_pio_copy(submit, buf, 64); 1024 wmb(); 1025 DELAY(1000); 1026 wmb(); 1027 i = 0; 1028 while (*confirm != 0xffffffff && i < 20) { 1029 DELAY(1000*10); 1030 i++; 1031 bus_dmamap_sync(sc->cmd_dma.dmat, 1032 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1033 } 1034 if (*confirm != 0xffffffff) { 1035 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1036 confirm, *confirm); 1037 1038 return ENXIO; 1039 } 1040 return 0; 1041 } 1042 1043 static int 1044 mxge_update_mac_address(mxge_softc_t *sc) 1045 { 1046 mxge_cmd_t cmd; 1047 uint8_t *addr = sc->mac_addr; 1048 int status; 1049 1050 1051 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1052 | (addr[2] << 8) | addr[3]); 1053 1054 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1055 1056 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1057 return status; 1058 } 1059 1060 static int 1061 mxge_change_pause(mxge_softc_t *sc, int pause) 1062 { 1063 mxge_cmd_t cmd; 1064 int status; 1065 1066 if (pause) 1067 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1068 &cmd); 1069 else 1070 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1071 &cmd); 1072 1073 if (status) { 1074 device_printf(sc->dev, "Failed to set flow control mode\n"); 1075 return ENXIO; 1076 } 1077 sc->pause = pause; 1078 return 0; 1079 } 1080 1081 static void 1082 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1083 { 1084 mxge_cmd_t cmd; 1085 int status; 1086 1087 if (mxge_always_promisc) 1088 promisc = 1; 1089 1090 if (promisc) 1091 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1092 &cmd); 1093 else 1094 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1095 &cmd); 1096 1097 if (status) { 1098 device_printf(sc->dev, "Failed to set promisc mode\n"); 1099 } 1100 } 1101 1102 static void 1103 mxge_set_multicast_list(mxge_softc_t *sc) 1104 { 1105 mxge_cmd_t cmd; 1106 struct ifmultiaddr *ifma; 1107 struct ifnet *ifp = sc->ifp; 1108 int err; 1109 1110 /* This firmware is known to not support multicast */ 1111 if (!sc->fw_multicast_support) 1112 return; 1113 1114 /* Disable multicast filtering while we play with the lists*/ 1115 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1116 if (err != 0) { 1117 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1118 " error status: %d\n", err); 1119 return; 1120 } 1121 1122 if (sc->adopted_rx_filter_bug) 1123 return; 1124 1125 if (ifp->if_flags & IFF_ALLMULTI) 1126 /* request to disable multicast filtering, so quit here */ 1127 return; 1128 1129 /* Flush all the filters */ 1130 1131 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1132 if (err != 0) { 1133 device_printf(sc->dev, 1134 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1135 ", error status: %d\n", err); 1136 return; 1137 } 1138 1139 /* Walk the multicast list, and add each address */ 1140 1141 if_maddr_rlock(ifp); 1142 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1143 if (ifma->ifma_addr->sa_family != AF_LINK) 1144 continue; 1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1146 &cmd.data0, 4); 1147 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1148 &cmd.data1, 2); 1149 cmd.data0 = htonl(cmd.data0); 1150 cmd.data1 = htonl(cmd.data1); 1151 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1152 if (err != 0) { 1153 device_printf(sc->dev, "Failed " 1154 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1155 "%d\t", err); 1156 /* abort, leaving multicast filtering off */ 1157 if_maddr_runlock(ifp); 1158 return; 1159 } 1160 } 1161 if_maddr_runlock(ifp); 1162 /* Enable multicast filtering */ 1163 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1164 if (err != 0) { 1165 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1166 ", error status: %d\n", err); 1167 } 1168 } 1169 1170 static int 1171 mxge_max_mtu(mxge_softc_t *sc) 1172 { 1173 mxge_cmd_t cmd; 1174 int status; 1175 1176 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1177 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1178 1179 /* try to set nbufs to see if it we can 1180 use virtually contiguous jumbos */ 1181 cmd.data0 = 0; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1183 &cmd); 1184 if (status == 0) 1185 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1186 1187 /* otherwise, we're limited to MJUMPAGESIZE */ 1188 return MJUMPAGESIZE - MXGEFW_PAD; 1189 } 1190 1191 static int 1192 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1193 { 1194 struct mxge_slice_state *ss; 1195 mxge_rx_done_t *rx_done; 1196 volatile uint32_t *irq_claim; 1197 mxge_cmd_t cmd; 1198 int slice, status; 1199 1200 /* try to send a reset command to the card to see if it 1201 is alive */ 1202 memset(&cmd, 0, sizeof (cmd)); 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1204 if (status != 0) { 1205 device_printf(sc->dev, "failed reset\n"); 1206 return ENXIO; 1207 } 1208 1209 mxge_dummy_rdma(sc, 1); 1210 1211 1212 /* set the intrq size */ 1213 cmd.data0 = sc->rx_ring_size; 1214 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1215 1216 /* 1217 * Even though we already know how many slices are supported 1218 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1219 * has magic side effects, and must be called after a reset. 1220 * It must be called prior to calling any RSS related cmds, 1221 * including assigning an interrupt queue for anything but 1222 * slice 0. It must also be called *after* 1223 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1224 * the firmware to compute offsets. 1225 */ 1226 1227 if (sc->num_slices > 1) { 1228 /* ask the maximum number of slices it supports */ 1229 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1230 &cmd); 1231 if (status != 0) { 1232 device_printf(sc->dev, 1233 "failed to get number of slices\n"); 1234 return status; 1235 } 1236 /* 1237 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1238 * to setting up the interrupt queue DMA 1239 */ 1240 cmd.data0 = sc->num_slices; 1241 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1242 #ifdef IFNET_BUF_RING 1243 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1244 #endif 1245 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1246 &cmd); 1247 if (status != 0) { 1248 device_printf(sc->dev, 1249 "failed to set number of slices\n"); 1250 return status; 1251 } 1252 } 1253 1254 1255 if (interrupts_setup) { 1256 /* Now exchange information about interrupts */ 1257 for (slice = 0; slice < sc->num_slices; slice++) { 1258 rx_done = &sc->ss[slice].rx_done; 1259 memset(rx_done->entry, 0, sc->rx_ring_size); 1260 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1261 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1262 cmd.data2 = slice; 1263 status |= mxge_send_cmd(sc, 1264 MXGEFW_CMD_SET_INTRQ_DMA, 1265 &cmd); 1266 } 1267 } 1268 1269 status |= mxge_send_cmd(sc, 1270 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1271 1272 1273 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1276 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1277 1278 1279 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1280 &cmd); 1281 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1282 if (status != 0) { 1283 device_printf(sc->dev, "failed set interrupt parameters\n"); 1284 return status; 1285 } 1286 1287 1288 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1289 1290 1291 /* run a DMA benchmark */ 1292 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1293 1294 for (slice = 0; slice < sc->num_slices; slice++) { 1295 ss = &sc->ss[slice]; 1296 1297 ss->irq_claim = irq_claim + (2 * slice); 1298 /* reset mcp/driver shared state back to 0 */ 1299 ss->rx_done.idx = 0; 1300 ss->rx_done.cnt = 0; 1301 ss->tx.req = 0; 1302 ss->tx.done = 0; 1303 ss->tx.pkt_done = 0; 1304 ss->tx.queue_active = 0; 1305 ss->tx.activate = 0; 1306 ss->tx.deactivate = 0; 1307 ss->tx.wake = 0; 1308 ss->tx.defrag = 0; 1309 ss->tx.stall = 0; 1310 ss->rx_big.cnt = 0; 1311 ss->rx_small.cnt = 0; 1312 ss->lro_bad_csum = 0; 1313 ss->lro_queued = 0; 1314 ss->lro_flushed = 0; 1315 if (ss->fw_stats != NULL) { 1316 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1317 } 1318 } 1319 sc->rdma_tags_available = 15; 1320 status = mxge_update_mac_address(sc); 1321 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1322 mxge_change_pause(sc, sc->pause); 1323 mxge_set_multicast_list(sc); 1324 if (sc->throttle) { 1325 cmd.data0 = sc->throttle; 1326 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1327 &cmd)) { 1328 device_printf(sc->dev, 1329 "can't enable throttle\n"); 1330 } 1331 } 1332 return status; 1333 } 1334 1335 static int 1336 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1337 { 1338 mxge_cmd_t cmd; 1339 mxge_softc_t *sc; 1340 int err; 1341 unsigned int throttle; 1342 1343 sc = arg1; 1344 throttle = sc->throttle; 1345 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1346 if (err != 0) { 1347 return err; 1348 } 1349 1350 if (throttle == sc->throttle) 1351 return 0; 1352 1353 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1354 return EINVAL; 1355 1356 mtx_lock(&sc->driver_mtx); 1357 cmd.data0 = throttle; 1358 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1359 if (err == 0) 1360 sc->throttle = throttle; 1361 mtx_unlock(&sc->driver_mtx); 1362 return err; 1363 } 1364 1365 static int 1366 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1367 { 1368 mxge_softc_t *sc; 1369 unsigned int intr_coal_delay; 1370 int err; 1371 1372 sc = arg1; 1373 intr_coal_delay = sc->intr_coal_delay; 1374 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1375 if (err != 0) { 1376 return err; 1377 } 1378 if (intr_coal_delay == sc->intr_coal_delay) 1379 return 0; 1380 1381 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1382 return EINVAL; 1383 1384 mtx_lock(&sc->driver_mtx); 1385 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1386 sc->intr_coal_delay = intr_coal_delay; 1387 1388 mtx_unlock(&sc->driver_mtx); 1389 return err; 1390 } 1391 1392 static int 1393 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1394 { 1395 mxge_softc_t *sc; 1396 unsigned int enabled; 1397 int err; 1398 1399 sc = arg1; 1400 enabled = sc->pause; 1401 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1402 if (err != 0) { 1403 return err; 1404 } 1405 if (enabled == sc->pause) 1406 return 0; 1407 1408 mtx_lock(&sc->driver_mtx); 1409 err = mxge_change_pause(sc, enabled); 1410 mtx_unlock(&sc->driver_mtx); 1411 return err; 1412 } 1413 1414 static int 1415 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1416 { 1417 struct ifnet *ifp; 1418 int err = 0; 1419 1420 ifp = sc->ifp; 1421 if (lro_cnt == 0) 1422 ifp->if_capenable &= ~IFCAP_LRO; 1423 else 1424 ifp->if_capenable |= IFCAP_LRO; 1425 sc->lro_cnt = lro_cnt; 1426 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1427 mxge_close(sc, 0); 1428 err = mxge_open(sc); 1429 } 1430 return err; 1431 } 1432 1433 static int 1434 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1435 { 1436 mxge_softc_t *sc; 1437 unsigned int lro_cnt; 1438 int err; 1439 1440 sc = arg1; 1441 lro_cnt = sc->lro_cnt; 1442 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1443 if (err != 0) 1444 return err; 1445 1446 if (lro_cnt == sc->lro_cnt) 1447 return 0; 1448 1449 if (lro_cnt > 128) 1450 return EINVAL; 1451 1452 mtx_lock(&sc->driver_mtx); 1453 err = mxge_change_lro_locked(sc, lro_cnt); 1454 mtx_unlock(&sc->driver_mtx); 1455 return err; 1456 } 1457 1458 static int 1459 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1460 { 1461 int err; 1462 1463 if (arg1 == NULL) 1464 return EFAULT; 1465 arg2 = be32toh(*(int *)arg1); 1466 arg1 = NULL; 1467 err = sysctl_handle_int(oidp, arg1, arg2, req); 1468 1469 return err; 1470 } 1471 1472 static void 1473 mxge_rem_sysctls(mxge_softc_t *sc) 1474 { 1475 struct mxge_slice_state *ss; 1476 int slice; 1477 1478 if (sc->slice_sysctl_tree == NULL) 1479 return; 1480 1481 for (slice = 0; slice < sc->num_slices; slice++) { 1482 ss = &sc->ss[slice]; 1483 if (ss == NULL || ss->sysctl_tree == NULL) 1484 continue; 1485 sysctl_ctx_free(&ss->sysctl_ctx); 1486 ss->sysctl_tree = NULL; 1487 } 1488 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1489 sc->slice_sysctl_tree = NULL; 1490 } 1491 1492 static void 1493 mxge_add_sysctls(mxge_softc_t *sc) 1494 { 1495 struct sysctl_ctx_list *ctx; 1496 struct sysctl_oid_list *children; 1497 mcp_irq_data_t *fw; 1498 struct mxge_slice_state *ss; 1499 int slice; 1500 char slice_num[8]; 1501 1502 ctx = device_get_sysctl_ctx(sc->dev); 1503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1504 fw = sc->ss[0].fw_stats; 1505 1506 /* random information */ 1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1508 "firmware_version", 1509 CTLFLAG_RD, &sc->fw_version, 1510 0, "firmware version"); 1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1512 "serial_number", 1513 CTLFLAG_RD, &sc->serial_number_string, 1514 0, "serial number"); 1515 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1516 "product_code", 1517 CTLFLAG_RD, &sc->product_code_string, 1518 0, "product_code"); 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1520 "pcie_link_width", 1521 CTLFLAG_RD, &sc->link_width, 1522 0, "tx_boundary"); 1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1524 "tx_boundary", 1525 CTLFLAG_RD, &sc->tx_boundary, 1526 0, "tx_boundary"); 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "write_combine", 1529 CTLFLAG_RD, &sc->wc, 1530 0, "write combining PIO?"); 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "read_dma_MBs", 1533 CTLFLAG_RD, &sc->read_dma, 1534 0, "DMA Read speed in MB/s"); 1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1536 "write_dma_MBs", 1537 CTLFLAG_RD, &sc->write_dma, 1538 0, "DMA Write speed in MB/s"); 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1540 "read_write_dma_MBs", 1541 CTLFLAG_RD, &sc->read_write_dma, 1542 0, "DMA concurrent Read/Write speed in MB/s"); 1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1544 "watchdog_resets", 1545 CTLFLAG_RD, &sc->watchdog_resets, 1546 0, "Number of times NIC was reset"); 1547 1548 1549 /* performance related tunables */ 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "intr_coal_delay", 1552 CTLTYPE_INT|CTLFLAG_RW, sc, 1553 0, mxge_change_intr_coal, 1554 "I", "interrupt coalescing delay in usecs"); 1555 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "throttle", 1558 CTLTYPE_INT|CTLFLAG_RW, sc, 1559 0, mxge_change_throttle, 1560 "I", "transmit throttling"); 1561 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "flow_control_enabled", 1564 CTLTYPE_INT|CTLFLAG_RW, sc, 1565 0, mxge_change_flow_control, 1566 "I", "interrupt coalescing delay in usecs"); 1567 1568 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1569 "deassert_wait", 1570 CTLFLAG_RW, &mxge_deassert_wait, 1571 0, "Wait for IRQ line to go low in ihandler"); 1572 1573 /* stats block from firmware is in network byte order. 1574 Need to swap it */ 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "link_up", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1578 0, mxge_handle_be32, 1579 "I", "link up"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "rdma_tags_available", 1582 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1583 0, mxge_handle_be32, 1584 "I", "rdma_tags_available"); 1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1586 "dropped_bad_crc32", 1587 CTLTYPE_INT|CTLFLAG_RD, 1588 &fw->dropped_bad_crc32, 1589 0, mxge_handle_be32, 1590 "I", "dropped_bad_crc32"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_bad_phy", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_bad_phy, 1595 0, mxge_handle_be32, 1596 "I", "dropped_bad_phy"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_link_error_or_filtered", 1599 CTLTYPE_INT|CTLFLAG_RD, 1600 &fw->dropped_link_error_or_filtered, 1601 0, mxge_handle_be32, 1602 "I", "dropped_link_error_or_filtered"); 1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1604 "dropped_link_overflow", 1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1606 0, mxge_handle_be32, 1607 "I", "dropped_link_overflow"); 1608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1609 "dropped_multicast_filtered", 1610 CTLTYPE_INT|CTLFLAG_RD, 1611 &fw->dropped_multicast_filtered, 1612 0, mxge_handle_be32, 1613 "I", "dropped_multicast_filtered"); 1614 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1615 "dropped_no_big_buffer", 1616 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1617 0, mxge_handle_be32, 1618 "I", "dropped_no_big_buffer"); 1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1620 "dropped_no_small_buffer", 1621 CTLTYPE_INT|CTLFLAG_RD, 1622 &fw->dropped_no_small_buffer, 1623 0, mxge_handle_be32, 1624 "I", "dropped_no_small_buffer"); 1625 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1626 "dropped_overrun", 1627 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1628 0, mxge_handle_be32, 1629 "I", "dropped_overrun"); 1630 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1631 "dropped_pause", 1632 CTLTYPE_INT|CTLFLAG_RD, 1633 &fw->dropped_pause, 1634 0, mxge_handle_be32, 1635 "I", "dropped_pause"); 1636 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1637 "dropped_runt", 1638 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1639 0, mxge_handle_be32, 1640 "I", "dropped_runt"); 1641 1642 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1643 "dropped_unicast_filtered", 1644 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1645 0, mxge_handle_be32, 1646 "I", "dropped_unicast_filtered"); 1647 1648 /* verbose printing? */ 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "verbose", 1651 CTLFLAG_RW, &mxge_verbose, 1652 0, "verbose printing"); 1653 1654 /* lro */ 1655 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1656 "lro_cnt", 1657 CTLTYPE_INT|CTLFLAG_RW, sc, 1658 0, mxge_change_lro, 1659 "I", "number of lro merge queues"); 1660 1661 1662 /* add counters exported for debugging from all slices */ 1663 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1664 sc->slice_sysctl_tree = 1665 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1666 "slice", CTLFLAG_RD, 0, ""); 1667 1668 for (slice = 0; slice < sc->num_slices; slice++) { 1669 ss = &sc->ss[slice]; 1670 sysctl_ctx_init(&ss->sysctl_ctx); 1671 ctx = &ss->sysctl_ctx; 1672 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1673 sprintf(slice_num, "%d", slice); 1674 ss->sysctl_tree = 1675 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1676 CTLFLAG_RD, 0, ""); 1677 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "rx_small_cnt", 1680 CTLFLAG_RD, &ss->rx_small.cnt, 1681 0, "rx_small_cnt"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "rx_big_cnt", 1684 CTLFLAG_RD, &ss->rx_big.cnt, 1685 0, "rx_small_cnt"); 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1688 0, "number of lro merge queues flushed"); 1689 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1692 0, "number of frames appended to lro merge" 1693 "queues"); 1694 1695 #ifndef IFNET_BUF_RING 1696 /* only transmit from slice 0 for now */ 1697 if (slice > 0) 1698 continue; 1699 #endif 1700 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1701 "tx_req", 1702 CTLFLAG_RD, &ss->tx.req, 1703 0, "tx_req"); 1704 1705 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1706 "tx_done", 1707 CTLFLAG_RD, &ss->tx.done, 1708 0, "tx_done"); 1709 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1710 "tx_pkt_done", 1711 CTLFLAG_RD, &ss->tx.pkt_done, 1712 0, "tx_done"); 1713 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1714 "tx_stall", 1715 CTLFLAG_RD, &ss->tx.stall, 1716 0, "tx_stall"); 1717 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1718 "tx_wake", 1719 CTLFLAG_RD, &ss->tx.wake, 1720 0, "tx_wake"); 1721 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1722 "tx_defrag", 1723 CTLFLAG_RD, &ss->tx.defrag, 1724 0, "tx_defrag"); 1725 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1726 "tx_queue_active", 1727 CTLFLAG_RD, &ss->tx.queue_active, 1728 0, "tx_queue_active"); 1729 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1730 "tx_activate", 1731 CTLFLAG_RD, &ss->tx.activate, 1732 0, "tx_activate"); 1733 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1734 "tx_deactivate", 1735 CTLFLAG_RD, &ss->tx.deactivate, 1736 0, "tx_deactivate"); 1737 } 1738 } 1739 1740 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1741 backwards one at a time and handle ring wraps */ 1742 1743 static inline void 1744 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1745 mcp_kreq_ether_send_t *src, int cnt) 1746 { 1747 int idx, starting_slot; 1748 starting_slot = tx->req; 1749 while (cnt > 1) { 1750 cnt--; 1751 idx = (starting_slot + cnt) & tx->mask; 1752 mxge_pio_copy(&tx->lanai[idx], 1753 &src[cnt], sizeof(*src)); 1754 wmb(); 1755 } 1756 } 1757 1758 /* 1759 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1760 * at most 32 bytes at a time, so as to avoid involving the software 1761 * pio handler in the nic. We re-write the first segment's flags 1762 * to mark them valid only after writing the entire chain 1763 */ 1764 1765 static inline void 1766 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1767 int cnt) 1768 { 1769 int idx, i; 1770 uint32_t *src_ints; 1771 volatile uint32_t *dst_ints; 1772 mcp_kreq_ether_send_t *srcp; 1773 volatile mcp_kreq_ether_send_t *dstp, *dst; 1774 uint8_t last_flags; 1775 1776 idx = tx->req & tx->mask; 1777 1778 last_flags = src->flags; 1779 src->flags = 0; 1780 wmb(); 1781 dst = dstp = &tx->lanai[idx]; 1782 srcp = src; 1783 1784 if ((idx + cnt) < tx->mask) { 1785 for (i = 0; i < (cnt - 1); i += 2) { 1786 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1787 wmb(); /* force write every 32 bytes */ 1788 srcp += 2; 1789 dstp += 2; 1790 } 1791 } else { 1792 /* submit all but the first request, and ensure 1793 that it is submitted below */ 1794 mxge_submit_req_backwards(tx, src, cnt); 1795 i = 0; 1796 } 1797 if (i < cnt) { 1798 /* submit the first request */ 1799 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1800 wmb(); /* barrier before setting valid flag */ 1801 } 1802 1803 /* re-write the last 32-bits with the valid flags */ 1804 src->flags = last_flags; 1805 src_ints = (uint32_t *)src; 1806 src_ints+=3; 1807 dst_ints = (volatile uint32_t *)dst; 1808 dst_ints+=3; 1809 *dst_ints = *src_ints; 1810 tx->req += cnt; 1811 wmb(); 1812 } 1813 1814 #if IFCAP_TSO4 1815 1816 static void 1817 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1818 int busdma_seg_cnt, int ip_off) 1819 { 1820 mxge_tx_ring_t *tx; 1821 mcp_kreq_ether_send_t *req; 1822 bus_dma_segment_t *seg; 1823 struct ip *ip; 1824 struct tcphdr *tcp; 1825 uint32_t low, high_swapped; 1826 int len, seglen, cum_len, cum_len_next; 1827 int next_is_first, chop, cnt, rdma_count, small; 1828 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1829 uint8_t flags, flags_next; 1830 static int once; 1831 1832 mss = m->m_pkthdr.tso_segsz; 1833 1834 /* negative cum_len signifies to the 1835 * send loop that we are still in the 1836 * header portion of the TSO packet. 1837 */ 1838 1839 /* ensure we have the ethernet, IP and TCP 1840 header together in the first mbuf, copy 1841 it to a scratch buffer if not */ 1842 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1843 m_copydata(m, 0, ip_off + sizeof (*ip), 1844 ss->scratch); 1845 ip = (struct ip *)(ss->scratch + ip_off); 1846 } else { 1847 ip = (struct ip *)(mtod(m, char *) + ip_off); 1848 } 1849 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1850 + sizeof (*tcp))) { 1851 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1852 + sizeof (*tcp), ss->scratch); 1853 ip = (struct ip *)(mtod(m, char *) + ip_off); 1854 } 1855 1856 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1857 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1858 cksum_offset = ip_off + (ip->ip_hl << 2); 1859 1860 /* TSO implies checksum offload on this hardware */ 1861 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) { 1862 /* 1863 * If packet has full TCP csum, replace it with pseudo hdr 1864 * sum that the NIC expects, otherwise the NIC will emit 1865 * packets with bad TCP checksums. 1866 */ 1867 m->m_pkthdr.csum_flags = CSUM_TCP; 1868 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1869 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 1870 htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset))); 1871 } 1872 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1873 1874 1875 /* for TSO, pseudo_hdr_offset holds mss. 1876 * The firmware figures out where to put 1877 * the checksum by parsing the header. */ 1878 pseudo_hdr_offset = htobe16(mss); 1879 1880 tx = &ss->tx; 1881 req = tx->req_list; 1882 seg = tx->seg_list; 1883 cnt = 0; 1884 rdma_count = 0; 1885 /* "rdma_count" is the number of RDMAs belonging to the 1886 * current packet BEFORE the current send request. For 1887 * non-TSO packets, this is equal to "count". 1888 * For TSO packets, rdma_count needs to be reset 1889 * to 0 after a segment cut. 1890 * 1891 * The rdma_count field of the send request is 1892 * the number of RDMAs of the packet starting at 1893 * that request. For TSO send requests with one ore more cuts 1894 * in the middle, this is the number of RDMAs starting 1895 * after the last cut in the request. All previous 1896 * segments before the last cut implicitly have 1 RDMA. 1897 * 1898 * Since the number of RDMAs is not known beforehand, 1899 * it must be filled-in retroactively - after each 1900 * segmentation cut or at the end of the entire packet. 1901 */ 1902 1903 while (busdma_seg_cnt) { 1904 /* Break the busdma segment up into pieces*/ 1905 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1906 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1907 len = seg->ds_len; 1908 1909 while (len) { 1910 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1911 seglen = len; 1912 cum_len_next = cum_len + seglen; 1913 (req-rdma_count)->rdma_count = rdma_count + 1; 1914 if (__predict_true(cum_len >= 0)) { 1915 /* payload */ 1916 chop = (cum_len_next > mss); 1917 cum_len_next = cum_len_next % mss; 1918 next_is_first = (cum_len_next == 0); 1919 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1920 flags_next |= next_is_first * 1921 MXGEFW_FLAGS_FIRST; 1922 rdma_count |= -(chop | next_is_first); 1923 rdma_count += chop & !next_is_first; 1924 } else if (cum_len_next >= 0) { 1925 /* header ends */ 1926 rdma_count = -1; 1927 cum_len_next = 0; 1928 seglen = -cum_len; 1929 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1930 flags_next = MXGEFW_FLAGS_TSO_PLD | 1931 MXGEFW_FLAGS_FIRST | 1932 (small * MXGEFW_FLAGS_SMALL); 1933 } 1934 1935 req->addr_high = high_swapped; 1936 req->addr_low = htobe32(low); 1937 req->pseudo_hdr_offset = pseudo_hdr_offset; 1938 req->pad = 0; 1939 req->rdma_count = 1; 1940 req->length = htobe16(seglen); 1941 req->cksum_offset = cksum_offset; 1942 req->flags = flags | ((cum_len & 1) * 1943 MXGEFW_FLAGS_ALIGN_ODD); 1944 low += seglen; 1945 len -= seglen; 1946 cum_len = cum_len_next; 1947 flags = flags_next; 1948 req++; 1949 cnt++; 1950 rdma_count++; 1951 if (__predict_false(cksum_offset > seglen)) 1952 cksum_offset -= seglen; 1953 else 1954 cksum_offset = 0; 1955 if (__predict_false(cnt > tx->max_desc)) 1956 goto drop; 1957 } 1958 busdma_seg_cnt--; 1959 seg++; 1960 } 1961 (req-rdma_count)->rdma_count = rdma_count; 1962 1963 do { 1964 req--; 1965 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1966 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1967 1968 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1969 mxge_submit_req(tx, tx->req_list, cnt); 1970 #ifdef IFNET_BUF_RING 1971 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1972 /* tell the NIC to start polling this slice */ 1973 *tx->send_go = 1; 1974 tx->queue_active = 1; 1975 tx->activate++; 1976 wmb(); 1977 } 1978 #endif 1979 return; 1980 1981 drop: 1982 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1983 m_freem(m); 1984 ss->oerrors++; 1985 if (!once) { 1986 printf("tx->max_desc exceeded via TSO!\n"); 1987 printf("mss = %d, %ld, %d!\n", mss, 1988 (long)seg - (long)tx->seg_list, tx->max_desc); 1989 once = 1; 1990 } 1991 return; 1992 1993 } 1994 1995 #endif /* IFCAP_TSO4 */ 1996 1997 #ifdef MXGE_NEW_VLAN_API 1998 /* 1999 * We reproduce the software vlan tag insertion from 2000 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2001 * vlan tag insertion. We need to advertise this in order to have the 2002 * vlan interface respect our csum offload flags. 2003 */ 2004 static struct mbuf * 2005 mxge_vlan_tag_insert(struct mbuf *m) 2006 { 2007 struct ether_vlan_header *evl; 2008 2009 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 2010 if (__predict_false(m == NULL)) 2011 return NULL; 2012 if (m->m_len < sizeof(*evl)) { 2013 m = m_pullup(m, sizeof(*evl)); 2014 if (__predict_false(m == NULL)) 2015 return NULL; 2016 } 2017 /* 2018 * Transform the Ethernet header into an Ethernet header 2019 * with 802.1Q encapsulation. 2020 */ 2021 evl = mtod(m, struct ether_vlan_header *); 2022 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2023 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2024 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2025 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2026 m->m_flags &= ~M_VLANTAG; 2027 return m; 2028 } 2029 #endif /* MXGE_NEW_VLAN_API */ 2030 2031 static void 2032 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2033 { 2034 mxge_softc_t *sc; 2035 mcp_kreq_ether_send_t *req; 2036 bus_dma_segment_t *seg; 2037 struct mbuf *m_tmp; 2038 struct ifnet *ifp; 2039 mxge_tx_ring_t *tx; 2040 struct ip *ip; 2041 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2042 uint16_t pseudo_hdr_offset; 2043 uint8_t flags, cksum_offset; 2044 2045 2046 sc = ss->sc; 2047 ifp = sc->ifp; 2048 tx = &ss->tx; 2049 2050 ip_off = sizeof (struct ether_header); 2051 #ifdef MXGE_NEW_VLAN_API 2052 if (m->m_flags & M_VLANTAG) { 2053 m = mxge_vlan_tag_insert(m); 2054 if (__predict_false(m == NULL)) 2055 goto drop; 2056 ip_off += ETHER_VLAN_ENCAP_LEN; 2057 } 2058 #endif 2059 /* (try to) map the frame for DMA */ 2060 idx = tx->req & tx->mask; 2061 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2062 m, tx->seg_list, &cnt, 2063 BUS_DMA_NOWAIT); 2064 if (__predict_false(err == EFBIG)) { 2065 /* Too many segments in the chain. Try 2066 to defrag */ 2067 m_tmp = m_defrag(m, M_NOWAIT); 2068 if (m_tmp == NULL) { 2069 goto drop; 2070 } 2071 ss->tx.defrag++; 2072 m = m_tmp; 2073 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2074 tx->info[idx].map, 2075 m, tx->seg_list, &cnt, 2076 BUS_DMA_NOWAIT); 2077 } 2078 if (__predict_false(err != 0)) { 2079 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2080 " packet len = %d\n", err, m->m_pkthdr.len); 2081 goto drop; 2082 } 2083 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2084 BUS_DMASYNC_PREWRITE); 2085 tx->info[idx].m = m; 2086 2087 #if IFCAP_TSO4 2088 /* TSO is different enough, we handle it in another routine */ 2089 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2090 mxge_encap_tso(ss, m, cnt, ip_off); 2091 return; 2092 } 2093 #endif 2094 2095 req = tx->req_list; 2096 cksum_offset = 0; 2097 pseudo_hdr_offset = 0; 2098 flags = MXGEFW_FLAGS_NO_TSO; 2099 2100 /* checksum offloading? */ 2101 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2102 /* ensure ip header is in first mbuf, copy 2103 it to a scratch buffer if not */ 2104 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2105 m_copydata(m, 0, ip_off + sizeof (*ip), 2106 ss->scratch); 2107 ip = (struct ip *)(ss->scratch + ip_off); 2108 } else { 2109 ip = (struct ip *)(mtod(m, char *) + ip_off); 2110 } 2111 cksum_offset = ip_off + (ip->ip_hl << 2); 2112 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2113 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2114 req->cksum_offset = cksum_offset; 2115 flags |= MXGEFW_FLAGS_CKSUM; 2116 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2117 } else { 2118 odd_flag = 0; 2119 } 2120 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2121 flags |= MXGEFW_FLAGS_SMALL; 2122 2123 /* convert segments into a request list */ 2124 cum_len = 0; 2125 seg = tx->seg_list; 2126 req->flags = MXGEFW_FLAGS_FIRST; 2127 for (i = 0; i < cnt; i++) { 2128 req->addr_low = 2129 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2130 req->addr_high = 2131 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2132 req->length = htobe16(seg->ds_len); 2133 req->cksum_offset = cksum_offset; 2134 if (cksum_offset > seg->ds_len) 2135 cksum_offset -= seg->ds_len; 2136 else 2137 cksum_offset = 0; 2138 req->pseudo_hdr_offset = pseudo_hdr_offset; 2139 req->pad = 0; /* complete solid 16-byte block */ 2140 req->rdma_count = 1; 2141 req->flags |= flags | ((cum_len & 1) * odd_flag); 2142 cum_len += seg->ds_len; 2143 seg++; 2144 req++; 2145 req->flags = 0; 2146 } 2147 req--; 2148 /* pad runts to 60 bytes */ 2149 if (cum_len < 60) { 2150 req++; 2151 req->addr_low = 2152 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2153 req->addr_high = 2154 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2155 req->length = htobe16(60 - cum_len); 2156 req->cksum_offset = 0; 2157 req->pseudo_hdr_offset = pseudo_hdr_offset; 2158 req->pad = 0; /* complete solid 16-byte block */ 2159 req->rdma_count = 1; 2160 req->flags |= flags | ((cum_len & 1) * odd_flag); 2161 cnt++; 2162 } 2163 2164 tx->req_list[0].rdma_count = cnt; 2165 #if 0 2166 /* print what the firmware will see */ 2167 for (i = 0; i < cnt; i++) { 2168 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2169 "cso:%d, flags:0x%x, rdma:%d\n", 2170 i, (int)ntohl(tx->req_list[i].addr_high), 2171 (int)ntohl(tx->req_list[i].addr_low), 2172 (int)ntohs(tx->req_list[i].length), 2173 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2174 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2175 tx->req_list[i].rdma_count); 2176 } 2177 printf("--------------\n"); 2178 #endif 2179 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2180 mxge_submit_req(tx, tx->req_list, cnt); 2181 #ifdef IFNET_BUF_RING 2182 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2183 /* tell the NIC to start polling this slice */ 2184 *tx->send_go = 1; 2185 tx->queue_active = 1; 2186 tx->activate++; 2187 wmb(); 2188 } 2189 #endif 2190 return; 2191 2192 drop: 2193 m_freem(m); 2194 ss->oerrors++; 2195 return; 2196 } 2197 2198 #ifdef IFNET_BUF_RING 2199 static void 2200 mxge_qflush(struct ifnet *ifp) 2201 { 2202 mxge_softc_t *sc = ifp->if_softc; 2203 mxge_tx_ring_t *tx; 2204 struct mbuf *m; 2205 int slice; 2206 2207 for (slice = 0; slice < sc->num_slices; slice++) { 2208 tx = &sc->ss[slice].tx; 2209 mtx_lock(&tx->mtx); 2210 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2211 m_freem(m); 2212 mtx_unlock(&tx->mtx); 2213 } 2214 if_qflush(ifp); 2215 } 2216 2217 static inline void 2218 mxge_start_locked(struct mxge_slice_state *ss) 2219 { 2220 mxge_softc_t *sc; 2221 struct mbuf *m; 2222 struct ifnet *ifp; 2223 mxge_tx_ring_t *tx; 2224 2225 sc = ss->sc; 2226 ifp = sc->ifp; 2227 tx = &ss->tx; 2228 2229 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2230 m = drbr_dequeue(ifp, tx->br); 2231 if (m == NULL) { 2232 return; 2233 } 2234 /* let BPF see it */ 2235 BPF_MTAP(ifp, m); 2236 2237 /* give it to the nic */ 2238 mxge_encap(ss, m); 2239 } 2240 /* ran out of transmit slots */ 2241 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2242 && (!drbr_empty(ifp, tx->br))) { 2243 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2244 tx->stall++; 2245 } 2246 } 2247 2248 static int 2249 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2250 { 2251 mxge_softc_t *sc; 2252 struct ifnet *ifp; 2253 mxge_tx_ring_t *tx; 2254 int err; 2255 2256 sc = ss->sc; 2257 ifp = sc->ifp; 2258 tx = &ss->tx; 2259 2260 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2261 IFF_DRV_RUNNING) { 2262 err = drbr_enqueue(ifp, tx->br, m); 2263 return (err); 2264 } 2265 2266 if (!drbr_needs_enqueue(ifp, tx->br) && 2267 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2268 /* let BPF see it */ 2269 BPF_MTAP(ifp, m); 2270 /* give it to the nic */ 2271 mxge_encap(ss, m); 2272 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2273 return (err); 2274 } 2275 if (!drbr_empty(ifp, tx->br)) 2276 mxge_start_locked(ss); 2277 return (0); 2278 } 2279 2280 static int 2281 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2282 { 2283 mxge_softc_t *sc = ifp->if_softc; 2284 struct mxge_slice_state *ss; 2285 mxge_tx_ring_t *tx; 2286 int err = 0; 2287 int slice; 2288 2289 slice = m->m_pkthdr.flowid; 2290 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2291 2292 ss = &sc->ss[slice]; 2293 tx = &ss->tx; 2294 2295 if (mtx_trylock(&tx->mtx)) { 2296 err = mxge_transmit_locked(ss, m); 2297 mtx_unlock(&tx->mtx); 2298 } else { 2299 err = drbr_enqueue(ifp, tx->br, m); 2300 } 2301 2302 return (err); 2303 } 2304 2305 #else 2306 2307 static inline void 2308 mxge_start_locked(struct mxge_slice_state *ss) 2309 { 2310 mxge_softc_t *sc; 2311 struct mbuf *m; 2312 struct ifnet *ifp; 2313 mxge_tx_ring_t *tx; 2314 2315 sc = ss->sc; 2316 ifp = sc->ifp; 2317 tx = &ss->tx; 2318 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2319 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2320 if (m == NULL) { 2321 return; 2322 } 2323 /* let BPF see it */ 2324 BPF_MTAP(ifp, m); 2325 2326 /* give it to the nic */ 2327 mxge_encap(ss, m); 2328 } 2329 /* ran out of transmit slots */ 2330 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2331 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2332 tx->stall++; 2333 } 2334 } 2335 #endif 2336 static void 2337 mxge_start(struct ifnet *ifp) 2338 { 2339 mxge_softc_t *sc = ifp->if_softc; 2340 struct mxge_slice_state *ss; 2341 2342 /* only use the first slice for now */ 2343 ss = &sc->ss[0]; 2344 mtx_lock(&ss->tx.mtx); 2345 mxge_start_locked(ss); 2346 mtx_unlock(&ss->tx.mtx); 2347 } 2348 2349 /* 2350 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2351 * at most 32 bytes at a time, so as to avoid involving the software 2352 * pio handler in the nic. We re-write the first segment's low 2353 * DMA address to mark it valid only after we write the entire chunk 2354 * in a burst 2355 */ 2356 static inline void 2357 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2358 mcp_kreq_ether_recv_t *src) 2359 { 2360 uint32_t low; 2361 2362 low = src->addr_low; 2363 src->addr_low = 0xffffffff; 2364 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2365 wmb(); 2366 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2367 wmb(); 2368 src->addr_low = low; 2369 dst->addr_low = low; 2370 wmb(); 2371 } 2372 2373 static int 2374 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2375 { 2376 bus_dma_segment_t seg; 2377 struct mbuf *m; 2378 mxge_rx_ring_t *rx = &ss->rx_small; 2379 int cnt, err; 2380 2381 m = m_gethdr(M_DONTWAIT, MT_DATA); 2382 if (m == NULL) { 2383 rx->alloc_fail++; 2384 err = ENOBUFS; 2385 goto done; 2386 } 2387 m->m_len = MHLEN; 2388 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2389 &seg, &cnt, BUS_DMA_NOWAIT); 2390 if (err != 0) { 2391 m_free(m); 2392 goto done; 2393 } 2394 rx->info[idx].m = m; 2395 rx->shadow[idx].addr_low = 2396 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2397 rx->shadow[idx].addr_high = 2398 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2399 2400 done: 2401 if ((idx & 7) == 7) 2402 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2403 return err; 2404 } 2405 2406 static int 2407 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2408 { 2409 bus_dma_segment_t seg[3]; 2410 struct mbuf *m; 2411 mxge_rx_ring_t *rx = &ss->rx_big; 2412 int cnt, err, i; 2413 2414 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2415 if (m == NULL) { 2416 rx->alloc_fail++; 2417 err = ENOBUFS; 2418 goto done; 2419 } 2420 m->m_len = rx->mlen; 2421 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2422 seg, &cnt, BUS_DMA_NOWAIT); 2423 if (err != 0) { 2424 m_free(m); 2425 goto done; 2426 } 2427 rx->info[idx].m = m; 2428 rx->shadow[idx].addr_low = 2429 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2430 rx->shadow[idx].addr_high = 2431 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2432 2433 #if MXGE_VIRT_JUMBOS 2434 for (i = 1; i < cnt; i++) { 2435 rx->shadow[idx + i].addr_low = 2436 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2437 rx->shadow[idx + i].addr_high = 2438 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2439 } 2440 #endif 2441 2442 done: 2443 for (i = 0; i < rx->nbufs; i++) { 2444 if ((idx & 7) == 7) { 2445 mxge_submit_8rx(&rx->lanai[idx - 7], 2446 &rx->shadow[idx - 7]); 2447 } 2448 idx++; 2449 } 2450 return err; 2451 } 2452 2453 /* 2454 * Myri10GE hardware checksums are not valid if the sender 2455 * padded the frame with non-zero padding. This is because 2456 * the firmware just does a simple 16-bit 1s complement 2457 * checksum across the entire frame, excluding the first 14 2458 * bytes. It is best to simply to check the checksum and 2459 * tell the stack about it only if the checksum is good 2460 */ 2461 2462 static inline uint16_t 2463 mxge_rx_csum(struct mbuf *m, int csum) 2464 { 2465 struct ether_header *eh; 2466 struct ip *ip; 2467 uint16_t c; 2468 2469 eh = mtod(m, struct ether_header *); 2470 2471 /* only deal with IPv4 TCP & UDP for now */ 2472 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2473 return 1; 2474 ip = (struct ip *)(eh + 1); 2475 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2476 ip->ip_p != IPPROTO_UDP)) 2477 return 1; 2478 #ifdef INET 2479 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2480 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2481 - (ip->ip_hl << 2) + ip->ip_p)); 2482 #else 2483 c = 1; 2484 #endif 2485 c ^= 0xffff; 2486 return (c); 2487 } 2488 2489 static void 2490 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2491 { 2492 struct ether_vlan_header *evl; 2493 struct ether_header *eh; 2494 uint32_t partial; 2495 2496 evl = mtod(m, struct ether_vlan_header *); 2497 eh = mtod(m, struct ether_header *); 2498 2499 /* 2500 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2501 * after what the firmware thought was the end of the ethernet 2502 * header. 2503 */ 2504 2505 /* put checksum into host byte order */ 2506 *csum = ntohs(*csum); 2507 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2508 (*csum) += ~partial; 2509 (*csum) += ((*csum) < ~partial); 2510 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2511 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2512 2513 /* restore checksum to network byte order; 2514 later consumers expect this */ 2515 *csum = htons(*csum); 2516 2517 /* save the tag */ 2518 #ifdef MXGE_NEW_VLAN_API 2519 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2520 #else 2521 { 2522 struct m_tag *mtag; 2523 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2524 M_NOWAIT); 2525 if (mtag == NULL) 2526 return; 2527 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2528 m_tag_prepend(m, mtag); 2529 } 2530 2531 #endif 2532 m->m_flags |= M_VLANTAG; 2533 2534 /* 2535 * Remove the 802.1q header by copying the Ethernet 2536 * addresses over it and adjusting the beginning of 2537 * the data in the mbuf. The encapsulated Ethernet 2538 * type field is already in place. 2539 */ 2540 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2541 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2542 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2543 } 2544 2545 2546 static inline void 2547 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2548 { 2549 mxge_softc_t *sc; 2550 struct ifnet *ifp; 2551 struct mbuf *m; 2552 struct ether_header *eh; 2553 mxge_rx_ring_t *rx; 2554 bus_dmamap_t old_map; 2555 int idx; 2556 uint16_t tcpudp_csum; 2557 2558 sc = ss->sc; 2559 ifp = sc->ifp; 2560 rx = &ss->rx_big; 2561 idx = rx->cnt & rx->mask; 2562 rx->cnt += rx->nbufs; 2563 /* save a pointer to the received mbuf */ 2564 m = rx->info[idx].m; 2565 /* try to replace the received mbuf */ 2566 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2567 /* drop the frame -- the old mbuf is re-cycled */ 2568 ifp->if_ierrors++; 2569 return; 2570 } 2571 2572 /* unmap the received buffer */ 2573 old_map = rx->info[idx].map; 2574 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2575 bus_dmamap_unload(rx->dmat, old_map); 2576 2577 /* swap the bus_dmamap_t's */ 2578 rx->info[idx].map = rx->extra_map; 2579 rx->extra_map = old_map; 2580 2581 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2582 * aligned */ 2583 m->m_data += MXGEFW_PAD; 2584 2585 m->m_pkthdr.rcvif = ifp; 2586 m->m_len = m->m_pkthdr.len = len; 2587 ss->ipackets++; 2588 eh = mtod(m, struct ether_header *); 2589 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2590 mxge_vlan_tag_remove(m, &csum); 2591 } 2592 /* if the checksum is valid, mark it in the mbuf header */ 2593 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2594 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2595 return; 2596 /* otherwise, it was a UDP frame, or a TCP frame which 2597 we could not do LRO on. Tell the stack that the 2598 checksum is good */ 2599 m->m_pkthdr.csum_data = 0xffff; 2600 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2601 } 2602 /* flowid only valid if RSS hashing is enabled */ 2603 if (sc->num_slices > 1) { 2604 m->m_pkthdr.flowid = (ss - sc->ss); 2605 m->m_flags |= M_FLOWID; 2606 } 2607 /* pass the frame up the stack */ 2608 (*ifp->if_input)(ifp, m); 2609 } 2610 2611 static inline void 2612 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2613 { 2614 mxge_softc_t *sc; 2615 struct ifnet *ifp; 2616 struct ether_header *eh; 2617 struct mbuf *m; 2618 mxge_rx_ring_t *rx; 2619 bus_dmamap_t old_map; 2620 int idx; 2621 uint16_t tcpudp_csum; 2622 2623 sc = ss->sc; 2624 ifp = sc->ifp; 2625 rx = &ss->rx_small; 2626 idx = rx->cnt & rx->mask; 2627 rx->cnt++; 2628 /* save a pointer to the received mbuf */ 2629 m = rx->info[idx].m; 2630 /* try to replace the received mbuf */ 2631 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2632 /* drop the frame -- the old mbuf is re-cycled */ 2633 ifp->if_ierrors++; 2634 return; 2635 } 2636 2637 /* unmap the received buffer */ 2638 old_map = rx->info[idx].map; 2639 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2640 bus_dmamap_unload(rx->dmat, old_map); 2641 2642 /* swap the bus_dmamap_t's */ 2643 rx->info[idx].map = rx->extra_map; 2644 rx->extra_map = old_map; 2645 2646 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2647 * aligned */ 2648 m->m_data += MXGEFW_PAD; 2649 2650 m->m_pkthdr.rcvif = ifp; 2651 m->m_len = m->m_pkthdr.len = len; 2652 ss->ipackets++; 2653 eh = mtod(m, struct ether_header *); 2654 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2655 mxge_vlan_tag_remove(m, &csum); 2656 } 2657 /* if the checksum is valid, mark it in the mbuf header */ 2658 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2659 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2660 return; 2661 /* otherwise, it was a UDP frame, or a TCP frame which 2662 we could not do LRO on. Tell the stack that the 2663 checksum is good */ 2664 m->m_pkthdr.csum_data = 0xffff; 2665 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2666 } 2667 /* flowid only valid if RSS hashing is enabled */ 2668 if (sc->num_slices > 1) { 2669 m->m_pkthdr.flowid = (ss - sc->ss); 2670 m->m_flags |= M_FLOWID; 2671 } 2672 /* pass the frame up the stack */ 2673 (*ifp->if_input)(ifp, m); 2674 } 2675 2676 static inline void 2677 mxge_clean_rx_done(struct mxge_slice_state *ss) 2678 { 2679 mxge_rx_done_t *rx_done = &ss->rx_done; 2680 int limit = 0; 2681 uint16_t length; 2682 uint16_t checksum; 2683 2684 2685 while (rx_done->entry[rx_done->idx].length != 0) { 2686 length = ntohs(rx_done->entry[rx_done->idx].length); 2687 rx_done->entry[rx_done->idx].length = 0; 2688 checksum = rx_done->entry[rx_done->idx].checksum; 2689 if (length <= (MHLEN - MXGEFW_PAD)) 2690 mxge_rx_done_small(ss, length, checksum); 2691 else 2692 mxge_rx_done_big(ss, length, checksum); 2693 rx_done->cnt++; 2694 rx_done->idx = rx_done->cnt & rx_done->mask; 2695 2696 /* limit potential for livelock */ 2697 if (__predict_false(++limit > rx_done->mask / 2)) 2698 break; 2699 } 2700 #ifdef INET 2701 while (!SLIST_EMPTY(&ss->lro_active)) { 2702 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2703 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2704 mxge_lro_flush(ss, lro); 2705 } 2706 #endif 2707 } 2708 2709 2710 static inline void 2711 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2712 { 2713 struct ifnet *ifp; 2714 mxge_tx_ring_t *tx; 2715 struct mbuf *m; 2716 bus_dmamap_t map; 2717 int idx; 2718 int *flags; 2719 2720 tx = &ss->tx; 2721 ifp = ss->sc->ifp; 2722 while (tx->pkt_done != mcp_idx) { 2723 idx = tx->done & tx->mask; 2724 tx->done++; 2725 m = tx->info[idx].m; 2726 /* mbuf and DMA map only attached to the first 2727 segment per-mbuf */ 2728 if (m != NULL) { 2729 ss->obytes += m->m_pkthdr.len; 2730 if (m->m_flags & M_MCAST) 2731 ss->omcasts++; 2732 ss->opackets++; 2733 tx->info[idx].m = NULL; 2734 map = tx->info[idx].map; 2735 bus_dmamap_unload(tx->dmat, map); 2736 m_freem(m); 2737 } 2738 if (tx->info[idx].flag) { 2739 tx->info[idx].flag = 0; 2740 tx->pkt_done++; 2741 } 2742 } 2743 2744 /* If we have space, clear IFF_OACTIVE to tell the stack that 2745 its OK to send packets */ 2746 #ifdef IFNET_BUF_RING 2747 flags = &ss->if_drv_flags; 2748 #else 2749 flags = &ifp->if_drv_flags; 2750 #endif 2751 mtx_lock(&ss->tx.mtx); 2752 if ((*flags) & IFF_DRV_OACTIVE && 2753 tx->req - tx->done < (tx->mask + 1)/4) { 2754 *(flags) &= ~IFF_DRV_OACTIVE; 2755 ss->tx.wake++; 2756 mxge_start_locked(ss); 2757 } 2758 #ifdef IFNET_BUF_RING 2759 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2760 /* let the NIC stop polling this queue, since there 2761 * are no more transmits pending */ 2762 if (tx->req == tx->done) { 2763 *tx->send_stop = 1; 2764 tx->queue_active = 0; 2765 tx->deactivate++; 2766 wmb(); 2767 } 2768 } 2769 #endif 2770 mtx_unlock(&ss->tx.mtx); 2771 2772 } 2773 2774 static struct mxge_media_type mxge_xfp_media_types[] = 2775 { 2776 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2777 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2778 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2779 {0, (1 << 5), "10GBASE-ER"}, 2780 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2781 {0, (1 << 3), "10GBASE-SW"}, 2782 {0, (1 << 2), "10GBASE-LW"}, 2783 {0, (1 << 1), "10GBASE-EW"}, 2784 {0, (1 << 0), "Reserved"} 2785 }; 2786 static struct mxge_media_type mxge_sfp_media_types[] = 2787 { 2788 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2789 {0, (1 << 7), "Reserved"}, 2790 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2791 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2792 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2793 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2794 }; 2795 2796 static void 2797 mxge_media_set(mxge_softc_t *sc, int media_type) 2798 { 2799 2800 2801 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2802 0, NULL); 2803 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2804 sc->current_media = media_type; 2805 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2806 } 2807 2808 static void 2809 mxge_media_init(mxge_softc_t *sc) 2810 { 2811 char *ptr; 2812 int i; 2813 2814 ifmedia_removeall(&sc->media); 2815 mxge_media_set(sc, IFM_AUTO); 2816 2817 /* 2818 * parse the product code to deterimine the interface type 2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2820 * after the 3rd dash in the driver's cached copy of the 2821 * EEPROM's product code string. 2822 */ 2823 ptr = sc->product_code_string; 2824 if (ptr == NULL) { 2825 device_printf(sc->dev, "Missing product code\n"); 2826 return; 2827 } 2828 2829 for (i = 0; i < 3; i++, ptr++) { 2830 ptr = index(ptr, '-'); 2831 if (ptr == NULL) { 2832 device_printf(sc->dev, 2833 "only %d dashes in PC?!?\n", i); 2834 return; 2835 } 2836 } 2837 if (*ptr == 'C') { 2838 /* -C is CX4 */ 2839 sc->connector = MXGE_CX4; 2840 mxge_media_set(sc, IFM_10G_CX4); 2841 } else if (*ptr == 'Q') { 2842 /* -Q is Quad Ribbon Fiber */ 2843 sc->connector = MXGE_QRF; 2844 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2845 /* FreeBSD has no media type for Quad ribbon fiber */ 2846 } else if (*ptr == 'R') { 2847 /* -R is XFP */ 2848 sc->connector = MXGE_XFP; 2849 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2850 /* -S or -2S is SFP+ */ 2851 sc->connector = MXGE_SFP; 2852 } else { 2853 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2854 } 2855 } 2856 2857 /* 2858 * Determine the media type for a NIC. Some XFPs will identify 2859 * themselves only when their link is up, so this is initiated via a 2860 * link up interrupt. However, this can potentially take up to 2861 * several milliseconds, so it is run via the watchdog routine, rather 2862 * than in the interrupt handler itself. 2863 */ 2864 static void 2865 mxge_media_probe(mxge_softc_t *sc) 2866 { 2867 mxge_cmd_t cmd; 2868 char *cage_type; 2869 2870 struct mxge_media_type *mxge_media_types = NULL; 2871 int i, err, ms, mxge_media_type_entries; 2872 uint32_t byte; 2873 2874 sc->need_media_probe = 0; 2875 2876 if (sc->connector == MXGE_XFP) { 2877 /* -R is XFP */ 2878 mxge_media_types = mxge_xfp_media_types; 2879 mxge_media_type_entries = 2880 sizeof (mxge_xfp_media_types) / 2881 sizeof (mxge_xfp_media_types[0]); 2882 byte = MXGE_XFP_COMPLIANCE_BYTE; 2883 cage_type = "XFP"; 2884 } else if (sc->connector == MXGE_SFP) { 2885 /* -S or -2S is SFP+ */ 2886 mxge_media_types = mxge_sfp_media_types; 2887 mxge_media_type_entries = 2888 sizeof (mxge_sfp_media_types) / 2889 sizeof (mxge_sfp_media_types[0]); 2890 cage_type = "SFP+"; 2891 byte = 3; 2892 } else { 2893 /* nothing to do; media type cannot change */ 2894 return; 2895 } 2896 2897 /* 2898 * At this point we know the NIC has an XFP cage, so now we 2899 * try to determine what is in the cage by using the 2900 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2901 * register. We read just one byte, which may take over 2902 * a millisecond 2903 */ 2904 2905 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2906 cmd.data1 = byte; 2907 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2908 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2909 device_printf(sc->dev, "failed to read XFP\n"); 2910 } 2911 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2912 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2913 } 2914 if (err != MXGEFW_CMD_OK) { 2915 return; 2916 } 2917 2918 /* now we wait for the data to be cached */ 2919 cmd.data0 = byte; 2920 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2921 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2922 DELAY(1000); 2923 cmd.data0 = byte; 2924 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2925 } 2926 if (err != MXGEFW_CMD_OK) { 2927 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2928 cage_type, err, ms); 2929 return; 2930 } 2931 2932 if (cmd.data0 == mxge_media_types[0].bitmask) { 2933 if (mxge_verbose) 2934 device_printf(sc->dev, "%s:%s\n", cage_type, 2935 mxge_media_types[0].name); 2936 if (sc->current_media != mxge_media_types[0].flag) { 2937 mxge_media_init(sc); 2938 mxge_media_set(sc, mxge_media_types[0].flag); 2939 } 2940 return; 2941 } 2942 for (i = 1; i < mxge_media_type_entries; i++) { 2943 if (cmd.data0 & mxge_media_types[i].bitmask) { 2944 if (mxge_verbose) 2945 device_printf(sc->dev, "%s:%s\n", 2946 cage_type, 2947 mxge_media_types[i].name); 2948 2949 if (sc->current_media != mxge_media_types[i].flag) { 2950 mxge_media_init(sc); 2951 mxge_media_set(sc, mxge_media_types[i].flag); 2952 } 2953 return; 2954 } 2955 } 2956 if (mxge_verbose) 2957 device_printf(sc->dev, "%s media 0x%x unknown\n", 2958 cage_type, cmd.data0); 2959 2960 return; 2961 } 2962 2963 static void 2964 mxge_intr(void *arg) 2965 { 2966 struct mxge_slice_state *ss = arg; 2967 mxge_softc_t *sc = ss->sc; 2968 mcp_irq_data_t *stats = ss->fw_stats; 2969 mxge_tx_ring_t *tx = &ss->tx; 2970 mxge_rx_done_t *rx_done = &ss->rx_done; 2971 uint32_t send_done_count; 2972 uint8_t valid; 2973 2974 2975 #ifndef IFNET_BUF_RING 2976 /* an interrupt on a non-zero slice is implicitly valid 2977 since MSI-X irqs are not shared */ 2978 if (ss != sc->ss) { 2979 mxge_clean_rx_done(ss); 2980 *ss->irq_claim = be32toh(3); 2981 return; 2982 } 2983 #endif 2984 2985 /* make sure the DMA has finished */ 2986 if (!stats->valid) { 2987 return; 2988 } 2989 valid = stats->valid; 2990 2991 if (sc->legacy_irq) { 2992 /* lower legacy IRQ */ 2993 *sc->irq_deassert = 0; 2994 if (!mxge_deassert_wait) 2995 /* don't wait for conf. that irq is low */ 2996 stats->valid = 0; 2997 } else { 2998 stats->valid = 0; 2999 } 3000 3001 /* loop while waiting for legacy irq deassertion */ 3002 do { 3003 /* check for transmit completes and receives */ 3004 send_done_count = be32toh(stats->send_done_count); 3005 while ((send_done_count != tx->pkt_done) || 3006 (rx_done->entry[rx_done->idx].length != 0)) { 3007 if (send_done_count != tx->pkt_done) 3008 mxge_tx_done(ss, (int)send_done_count); 3009 mxge_clean_rx_done(ss); 3010 send_done_count = be32toh(stats->send_done_count); 3011 } 3012 if (sc->legacy_irq && mxge_deassert_wait) 3013 wmb(); 3014 } while (*((volatile uint8_t *) &stats->valid)); 3015 3016 /* fw link & error stats meaningful only on the first slice */ 3017 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3018 if (sc->link_state != stats->link_up) { 3019 sc->link_state = stats->link_up; 3020 if (sc->link_state) { 3021 if_link_state_change(sc->ifp, LINK_STATE_UP); 3022 sc->ifp->if_baudrate = IF_Gbps(10UL); 3023 if (mxge_verbose) 3024 device_printf(sc->dev, "link up\n"); 3025 } else { 3026 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3027 sc->ifp->if_baudrate = 0; 3028 if (mxge_verbose) 3029 device_printf(sc->dev, "link down\n"); 3030 } 3031 sc->need_media_probe = 1; 3032 } 3033 if (sc->rdma_tags_available != 3034 be32toh(stats->rdma_tags_available)) { 3035 sc->rdma_tags_available = 3036 be32toh(stats->rdma_tags_available); 3037 device_printf(sc->dev, "RDMA timed out! %d tags " 3038 "left\n", sc->rdma_tags_available); 3039 } 3040 3041 if (stats->link_down) { 3042 sc->down_cnt += stats->link_down; 3043 sc->link_state = 0; 3044 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3045 } 3046 } 3047 3048 /* check to see if we have rx token to pass back */ 3049 if (valid & 0x1) 3050 *ss->irq_claim = be32toh(3); 3051 *(ss->irq_claim + 1) = be32toh(3); 3052 } 3053 3054 static void 3055 mxge_init(void *arg) 3056 { 3057 } 3058 3059 3060 3061 static void 3062 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3063 { 3064 struct lro_entry *lro_entry; 3065 int i; 3066 3067 while (!SLIST_EMPTY(&ss->lro_free)) { 3068 lro_entry = SLIST_FIRST(&ss->lro_free); 3069 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3070 free(lro_entry, M_DEVBUF); 3071 } 3072 3073 for (i = 0; i <= ss->rx_big.mask; i++) { 3074 if (ss->rx_big.info[i].m == NULL) 3075 continue; 3076 bus_dmamap_unload(ss->rx_big.dmat, 3077 ss->rx_big.info[i].map); 3078 m_freem(ss->rx_big.info[i].m); 3079 ss->rx_big.info[i].m = NULL; 3080 } 3081 3082 for (i = 0; i <= ss->rx_small.mask; i++) { 3083 if (ss->rx_small.info[i].m == NULL) 3084 continue; 3085 bus_dmamap_unload(ss->rx_small.dmat, 3086 ss->rx_small.info[i].map); 3087 m_freem(ss->rx_small.info[i].m); 3088 ss->rx_small.info[i].m = NULL; 3089 } 3090 3091 /* transmit ring used only on the first slice */ 3092 if (ss->tx.info == NULL) 3093 return; 3094 3095 for (i = 0; i <= ss->tx.mask; i++) { 3096 ss->tx.info[i].flag = 0; 3097 if (ss->tx.info[i].m == NULL) 3098 continue; 3099 bus_dmamap_unload(ss->tx.dmat, 3100 ss->tx.info[i].map); 3101 m_freem(ss->tx.info[i].m); 3102 ss->tx.info[i].m = NULL; 3103 } 3104 } 3105 3106 static void 3107 mxge_free_mbufs(mxge_softc_t *sc) 3108 { 3109 int slice; 3110 3111 for (slice = 0; slice < sc->num_slices; slice++) 3112 mxge_free_slice_mbufs(&sc->ss[slice]); 3113 } 3114 3115 static void 3116 mxge_free_slice_rings(struct mxge_slice_state *ss) 3117 { 3118 int i; 3119 3120 3121 if (ss->rx_done.entry != NULL) 3122 mxge_dma_free(&ss->rx_done.dma); 3123 ss->rx_done.entry = NULL; 3124 3125 if (ss->tx.req_bytes != NULL) 3126 free(ss->tx.req_bytes, M_DEVBUF); 3127 ss->tx.req_bytes = NULL; 3128 3129 if (ss->tx.seg_list != NULL) 3130 free(ss->tx.seg_list, M_DEVBUF); 3131 ss->tx.seg_list = NULL; 3132 3133 if (ss->rx_small.shadow != NULL) 3134 free(ss->rx_small.shadow, M_DEVBUF); 3135 ss->rx_small.shadow = NULL; 3136 3137 if (ss->rx_big.shadow != NULL) 3138 free(ss->rx_big.shadow, M_DEVBUF); 3139 ss->rx_big.shadow = NULL; 3140 3141 if (ss->tx.info != NULL) { 3142 if (ss->tx.dmat != NULL) { 3143 for (i = 0; i <= ss->tx.mask; i++) { 3144 bus_dmamap_destroy(ss->tx.dmat, 3145 ss->tx.info[i].map); 3146 } 3147 bus_dma_tag_destroy(ss->tx.dmat); 3148 } 3149 free(ss->tx.info, M_DEVBUF); 3150 } 3151 ss->tx.info = NULL; 3152 3153 if (ss->rx_small.info != NULL) { 3154 if (ss->rx_small.dmat != NULL) { 3155 for (i = 0; i <= ss->rx_small.mask; i++) { 3156 bus_dmamap_destroy(ss->rx_small.dmat, 3157 ss->rx_small.info[i].map); 3158 } 3159 bus_dmamap_destroy(ss->rx_small.dmat, 3160 ss->rx_small.extra_map); 3161 bus_dma_tag_destroy(ss->rx_small.dmat); 3162 } 3163 free(ss->rx_small.info, M_DEVBUF); 3164 } 3165 ss->rx_small.info = NULL; 3166 3167 if (ss->rx_big.info != NULL) { 3168 if (ss->rx_big.dmat != NULL) { 3169 for (i = 0; i <= ss->rx_big.mask; i++) { 3170 bus_dmamap_destroy(ss->rx_big.dmat, 3171 ss->rx_big.info[i].map); 3172 } 3173 bus_dmamap_destroy(ss->rx_big.dmat, 3174 ss->rx_big.extra_map); 3175 bus_dma_tag_destroy(ss->rx_big.dmat); 3176 } 3177 free(ss->rx_big.info, M_DEVBUF); 3178 } 3179 ss->rx_big.info = NULL; 3180 } 3181 3182 static void 3183 mxge_free_rings(mxge_softc_t *sc) 3184 { 3185 int slice; 3186 3187 for (slice = 0; slice < sc->num_slices; slice++) 3188 mxge_free_slice_rings(&sc->ss[slice]); 3189 } 3190 3191 static int 3192 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3193 int tx_ring_entries) 3194 { 3195 mxge_softc_t *sc = ss->sc; 3196 size_t bytes; 3197 int err, i; 3198 3199 err = ENOMEM; 3200 3201 /* allocate per-slice receive resources */ 3202 3203 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3204 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3205 3206 /* allocate the rx shadow rings */ 3207 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3208 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3209 if (ss->rx_small.shadow == NULL) 3210 return err; 3211 3212 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3213 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3214 if (ss->rx_big.shadow == NULL) 3215 return err; 3216 3217 /* allocate the rx host info rings */ 3218 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3219 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3220 if (ss->rx_small.info == NULL) 3221 return err; 3222 3223 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3224 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3225 if (ss->rx_big.info == NULL) 3226 return err; 3227 3228 /* allocate the rx busdma resources */ 3229 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3230 1, /* alignment */ 3231 4096, /* boundary */ 3232 BUS_SPACE_MAXADDR, /* low */ 3233 BUS_SPACE_MAXADDR, /* high */ 3234 NULL, NULL, /* filter */ 3235 MHLEN, /* maxsize */ 3236 1, /* num segs */ 3237 MHLEN, /* maxsegsize */ 3238 BUS_DMA_ALLOCNOW, /* flags */ 3239 NULL, NULL, /* lock */ 3240 &ss->rx_small.dmat); /* tag */ 3241 if (err != 0) { 3242 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3243 err); 3244 return err; 3245 } 3246 3247 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3248 1, /* alignment */ 3249 #if MXGE_VIRT_JUMBOS 3250 4096, /* boundary */ 3251 #else 3252 0, /* boundary */ 3253 #endif 3254 BUS_SPACE_MAXADDR, /* low */ 3255 BUS_SPACE_MAXADDR, /* high */ 3256 NULL, NULL, /* filter */ 3257 3*4096, /* maxsize */ 3258 #if MXGE_VIRT_JUMBOS 3259 3, /* num segs */ 3260 4096, /* maxsegsize*/ 3261 #else 3262 1, /* num segs */ 3263 MJUM9BYTES, /* maxsegsize*/ 3264 #endif 3265 BUS_DMA_ALLOCNOW, /* flags */ 3266 NULL, NULL, /* lock */ 3267 &ss->rx_big.dmat); /* tag */ 3268 if (err != 0) { 3269 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3270 err); 3271 return err; 3272 } 3273 for (i = 0; i <= ss->rx_small.mask; i++) { 3274 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3275 &ss->rx_small.info[i].map); 3276 if (err != 0) { 3277 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3278 err); 3279 return err; 3280 } 3281 } 3282 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3283 &ss->rx_small.extra_map); 3284 if (err != 0) { 3285 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3286 err); 3287 return err; 3288 } 3289 3290 for (i = 0; i <= ss->rx_big.mask; i++) { 3291 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3292 &ss->rx_big.info[i].map); 3293 if (err != 0) { 3294 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3295 err); 3296 return err; 3297 } 3298 } 3299 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3300 &ss->rx_big.extra_map); 3301 if (err != 0) { 3302 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3303 err); 3304 return err; 3305 } 3306 3307 /* now allocate TX resouces */ 3308 3309 #ifndef IFNET_BUF_RING 3310 /* only use a single TX ring for now */ 3311 if (ss != ss->sc->ss) 3312 return 0; 3313 #endif 3314 3315 ss->tx.mask = tx_ring_entries - 1; 3316 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3317 3318 3319 /* allocate the tx request copy block */ 3320 bytes = 8 + 3321 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3322 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3323 if (ss->tx.req_bytes == NULL) 3324 return err; 3325 /* ensure req_list entries are aligned to 8 bytes */ 3326 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3327 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3328 3329 /* allocate the tx busdma segment list */ 3330 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3331 ss->tx.seg_list = (bus_dma_segment_t *) 3332 malloc(bytes, M_DEVBUF, M_WAITOK); 3333 if (ss->tx.seg_list == NULL) 3334 return err; 3335 3336 /* allocate the tx host info ring */ 3337 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3338 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3339 if (ss->tx.info == NULL) 3340 return err; 3341 3342 /* allocate the tx busdma resources */ 3343 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3344 1, /* alignment */ 3345 sc->tx_boundary, /* boundary */ 3346 BUS_SPACE_MAXADDR, /* low */ 3347 BUS_SPACE_MAXADDR, /* high */ 3348 NULL, NULL, /* filter */ 3349 65536 + 256, /* maxsize */ 3350 ss->tx.max_desc - 2, /* num segs */ 3351 sc->tx_boundary, /* maxsegsz */ 3352 BUS_DMA_ALLOCNOW, /* flags */ 3353 NULL, NULL, /* lock */ 3354 &ss->tx.dmat); /* tag */ 3355 3356 if (err != 0) { 3357 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3358 err); 3359 return err; 3360 } 3361 3362 /* now use these tags to setup dmamaps for each slot 3363 in the ring */ 3364 for (i = 0; i <= ss->tx.mask; i++) { 3365 err = bus_dmamap_create(ss->tx.dmat, 0, 3366 &ss->tx.info[i].map); 3367 if (err != 0) { 3368 device_printf(sc->dev, "Err %d tx dmamap\n", 3369 err); 3370 return err; 3371 } 3372 } 3373 return 0; 3374 3375 } 3376 3377 static int 3378 mxge_alloc_rings(mxge_softc_t *sc) 3379 { 3380 mxge_cmd_t cmd; 3381 int tx_ring_size; 3382 int tx_ring_entries, rx_ring_entries; 3383 int err, slice; 3384 3385 /* get ring sizes */ 3386 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3387 tx_ring_size = cmd.data0; 3388 if (err != 0) { 3389 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3390 goto abort; 3391 } 3392 3393 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3394 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3395 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3396 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3397 IFQ_SET_READY(&sc->ifp->if_snd); 3398 3399 for (slice = 0; slice < sc->num_slices; slice++) { 3400 err = mxge_alloc_slice_rings(&sc->ss[slice], 3401 rx_ring_entries, 3402 tx_ring_entries); 3403 if (err != 0) 3404 goto abort; 3405 } 3406 return 0; 3407 3408 abort: 3409 mxge_free_rings(sc); 3410 return err; 3411 3412 } 3413 3414 3415 static void 3416 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3417 { 3418 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3419 3420 if (bufsize < MCLBYTES) { 3421 /* easy, everything fits in a single buffer */ 3422 *big_buf_size = MCLBYTES; 3423 *cl_size = MCLBYTES; 3424 *nbufs = 1; 3425 return; 3426 } 3427 3428 if (bufsize < MJUMPAGESIZE) { 3429 /* still easy, everything still fits in a single buffer */ 3430 *big_buf_size = MJUMPAGESIZE; 3431 *cl_size = MJUMPAGESIZE; 3432 *nbufs = 1; 3433 return; 3434 } 3435 #if MXGE_VIRT_JUMBOS 3436 /* now we need to use virtually contiguous buffers */ 3437 *cl_size = MJUM9BYTES; 3438 *big_buf_size = 4096; 3439 *nbufs = mtu / 4096 + 1; 3440 /* needs to be a power of two, so round up */ 3441 if (*nbufs == 3) 3442 *nbufs = 4; 3443 #else 3444 *cl_size = MJUM9BYTES; 3445 *big_buf_size = MJUM9BYTES; 3446 *nbufs = 1; 3447 #endif 3448 } 3449 3450 static int 3451 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3452 { 3453 mxge_softc_t *sc; 3454 mxge_cmd_t cmd; 3455 bus_dmamap_t map; 3456 struct lro_entry *lro_entry; 3457 int err, i, slice; 3458 3459 3460 sc = ss->sc; 3461 slice = ss - sc->ss; 3462 3463 SLIST_INIT(&ss->lro_free); 3464 SLIST_INIT(&ss->lro_active); 3465 3466 for (i = 0; i < sc->lro_cnt; i++) { 3467 lro_entry = (struct lro_entry *) 3468 malloc(sizeof (*lro_entry), M_DEVBUF, 3469 M_NOWAIT | M_ZERO); 3470 if (lro_entry == NULL) { 3471 sc->lro_cnt = i; 3472 break; 3473 } 3474 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3475 } 3476 /* get the lanai pointers to the send and receive rings */ 3477 3478 err = 0; 3479 #ifndef IFNET_BUF_RING 3480 /* We currently only send from the first slice */ 3481 if (slice == 0) { 3482 #endif 3483 cmd.data0 = slice; 3484 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3485 ss->tx.lanai = 3486 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3487 ss->tx.send_go = (volatile uint32_t *) 3488 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3489 ss->tx.send_stop = (volatile uint32_t *) 3490 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3491 #ifndef IFNET_BUF_RING 3492 } 3493 #endif 3494 cmd.data0 = slice; 3495 err |= mxge_send_cmd(sc, 3496 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3497 ss->rx_small.lanai = 3498 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3499 cmd.data0 = slice; 3500 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3501 ss->rx_big.lanai = 3502 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3503 3504 if (err != 0) { 3505 device_printf(sc->dev, 3506 "failed to get ring sizes or locations\n"); 3507 return EIO; 3508 } 3509 3510 /* stock receive rings */ 3511 for (i = 0; i <= ss->rx_small.mask; i++) { 3512 map = ss->rx_small.info[i].map; 3513 err = mxge_get_buf_small(ss, map, i); 3514 if (err) { 3515 device_printf(sc->dev, "alloced %d/%d smalls\n", 3516 i, ss->rx_small.mask + 1); 3517 return ENOMEM; 3518 } 3519 } 3520 for (i = 0; i <= ss->rx_big.mask; i++) { 3521 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3522 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3523 } 3524 ss->rx_big.nbufs = nbufs; 3525 ss->rx_big.cl_size = cl_size; 3526 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3527 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3528 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3529 map = ss->rx_big.info[i].map; 3530 err = mxge_get_buf_big(ss, map, i); 3531 if (err) { 3532 device_printf(sc->dev, "alloced %d/%d bigs\n", 3533 i, ss->rx_big.mask + 1); 3534 return ENOMEM; 3535 } 3536 } 3537 return 0; 3538 } 3539 3540 static int 3541 mxge_open(mxge_softc_t *sc) 3542 { 3543 mxge_cmd_t cmd; 3544 int err, big_bytes, nbufs, slice, cl_size, i; 3545 bus_addr_t bus; 3546 volatile uint8_t *itable; 3547 struct mxge_slice_state *ss; 3548 3549 /* Copy the MAC address in case it was overridden */ 3550 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3551 3552 err = mxge_reset(sc, 1); 3553 if (err != 0) { 3554 device_printf(sc->dev, "failed to reset\n"); 3555 return EIO; 3556 } 3557 3558 if (sc->num_slices > 1) { 3559 /* setup the indirection table */ 3560 cmd.data0 = sc->num_slices; 3561 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3562 &cmd); 3563 3564 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3565 &cmd); 3566 if (err != 0) { 3567 device_printf(sc->dev, 3568 "failed to setup rss tables\n"); 3569 return err; 3570 } 3571 3572 /* just enable an identity mapping */ 3573 itable = sc->sram + cmd.data0; 3574 for (i = 0; i < sc->num_slices; i++) 3575 itable[i] = (uint8_t)i; 3576 3577 cmd.data0 = 1; 3578 cmd.data1 = mxge_rss_hash_type; 3579 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3580 if (err != 0) { 3581 device_printf(sc->dev, "failed to enable slices\n"); 3582 return err; 3583 } 3584 } 3585 3586 3587 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3588 3589 cmd.data0 = nbufs; 3590 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3591 &cmd); 3592 /* error is only meaningful if we're trying to set 3593 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3594 if (err && nbufs > 1) { 3595 device_printf(sc->dev, 3596 "Failed to set alway-use-n to %d\n", 3597 nbufs); 3598 return EIO; 3599 } 3600 /* Give the firmware the mtu and the big and small buffer 3601 sizes. The firmware wants the big buf size to be a power 3602 of two. Luckily, FreeBSD's clusters are powers of two */ 3603 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3604 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3605 cmd.data0 = MHLEN - MXGEFW_PAD; 3606 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3607 &cmd); 3608 cmd.data0 = big_bytes; 3609 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3610 3611 if (err != 0) { 3612 device_printf(sc->dev, "failed to setup params\n"); 3613 goto abort; 3614 } 3615 3616 /* Now give him the pointer to the stats block */ 3617 for (slice = 0; 3618 #ifdef IFNET_BUF_RING 3619 slice < sc->num_slices; 3620 #else 3621 slice < 1; 3622 #endif 3623 slice++) { 3624 ss = &sc->ss[slice]; 3625 cmd.data0 = 3626 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3627 cmd.data1 = 3628 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3629 cmd.data2 = sizeof(struct mcp_irq_data); 3630 cmd.data2 |= (slice << 16); 3631 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3632 } 3633 3634 if (err != 0) { 3635 bus = sc->ss->fw_stats_dma.bus_addr; 3636 bus += offsetof(struct mcp_irq_data, send_done_count); 3637 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3638 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3639 err = mxge_send_cmd(sc, 3640 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3641 &cmd); 3642 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3643 sc->fw_multicast_support = 0; 3644 } else { 3645 sc->fw_multicast_support = 1; 3646 } 3647 3648 if (err != 0) { 3649 device_printf(sc->dev, "failed to setup params\n"); 3650 goto abort; 3651 } 3652 3653 for (slice = 0; slice < sc->num_slices; slice++) { 3654 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3655 if (err != 0) { 3656 device_printf(sc->dev, "couldn't open slice %d\n", 3657 slice); 3658 goto abort; 3659 } 3660 } 3661 3662 /* Finally, start the firmware running */ 3663 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3664 if (err) { 3665 device_printf(sc->dev, "Couldn't bring up link\n"); 3666 goto abort; 3667 } 3668 #ifdef IFNET_BUF_RING 3669 for (slice = 0; slice < sc->num_slices; slice++) { 3670 ss = &sc->ss[slice]; 3671 ss->if_drv_flags |= IFF_DRV_RUNNING; 3672 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3673 } 3674 #endif 3675 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3676 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3677 3678 return 0; 3679 3680 3681 abort: 3682 mxge_free_mbufs(sc); 3683 3684 return err; 3685 } 3686 3687 static int 3688 mxge_close(mxge_softc_t *sc, int down) 3689 { 3690 mxge_cmd_t cmd; 3691 int err, old_down_cnt; 3692 #ifdef IFNET_BUF_RING 3693 struct mxge_slice_state *ss; 3694 int slice; 3695 #endif 3696 3697 #ifdef IFNET_BUF_RING 3698 for (slice = 0; slice < sc->num_slices; slice++) { 3699 ss = &sc->ss[slice]; 3700 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3701 } 3702 #endif 3703 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3704 if (!down) { 3705 old_down_cnt = sc->down_cnt; 3706 wmb(); 3707 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3708 if (err) { 3709 device_printf(sc->dev, 3710 "Couldn't bring down link\n"); 3711 } 3712 if (old_down_cnt == sc->down_cnt) { 3713 /* wait for down irq */ 3714 DELAY(10 * sc->intr_coal_delay); 3715 } 3716 wmb(); 3717 if (old_down_cnt == sc->down_cnt) { 3718 device_printf(sc->dev, "never got down irq\n"); 3719 } 3720 } 3721 mxge_free_mbufs(sc); 3722 3723 return 0; 3724 } 3725 3726 static void 3727 mxge_setup_cfg_space(mxge_softc_t *sc) 3728 { 3729 device_t dev = sc->dev; 3730 int reg; 3731 uint16_t cmd, lnk, pectl; 3732 3733 /* find the PCIe link width and set max read request to 4KB*/ 3734 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3735 lnk = pci_read_config(dev, reg + 0x12, 2); 3736 sc->link_width = (lnk >> 4) & 0x3f; 3737 3738 if (sc->pectl == 0) { 3739 pectl = pci_read_config(dev, reg + 0x8, 2); 3740 pectl = (pectl & ~0x7000) | (5 << 12); 3741 pci_write_config(dev, reg + 0x8, pectl, 2); 3742 sc->pectl = pectl; 3743 } else { 3744 /* restore saved pectl after watchdog reset */ 3745 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3746 } 3747 } 3748 3749 /* Enable DMA and Memory space access */ 3750 pci_enable_busmaster(dev); 3751 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3752 cmd |= PCIM_CMD_MEMEN; 3753 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3754 } 3755 3756 static uint32_t 3757 mxge_read_reboot(mxge_softc_t *sc) 3758 { 3759 device_t dev = sc->dev; 3760 uint32_t vs; 3761 3762 /* find the vendor specific offset */ 3763 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3764 device_printf(sc->dev, 3765 "could not find vendor specific offset\n"); 3766 return (uint32_t)-1; 3767 } 3768 /* enable read32 mode */ 3769 pci_write_config(dev, vs + 0x10, 0x3, 1); 3770 /* tell NIC which register to read */ 3771 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3772 return (pci_read_config(dev, vs + 0x14, 4)); 3773 } 3774 3775 static void 3776 mxge_watchdog_reset(mxge_softc_t *sc) 3777 { 3778 struct pci_devinfo *dinfo; 3779 struct mxge_slice_state *ss; 3780 int err, running, s, num_tx_slices = 1; 3781 uint32_t reboot; 3782 uint16_t cmd; 3783 3784 err = ENXIO; 3785 3786 device_printf(sc->dev, "Watchdog reset!\n"); 3787 3788 /* 3789 * check to see if the NIC rebooted. If it did, then all of 3790 * PCI config space has been reset, and things like the 3791 * busmaster bit will be zero. If this is the case, then we 3792 * must restore PCI config space before the NIC can be used 3793 * again 3794 */ 3795 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3796 if (cmd == 0xffff) { 3797 /* 3798 * maybe the watchdog caught the NIC rebooting; wait 3799 * up to 100ms for it to finish. If it does not come 3800 * back, then give up 3801 */ 3802 DELAY(1000*100); 3803 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3804 if (cmd == 0xffff) { 3805 device_printf(sc->dev, "NIC disappeared!\n"); 3806 } 3807 } 3808 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3809 /* print the reboot status */ 3810 reboot = mxge_read_reboot(sc); 3811 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3812 reboot); 3813 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3814 if (running) { 3815 3816 /* 3817 * quiesce NIC so that TX routines will not try to 3818 * xmit after restoration of BAR 3819 */ 3820 3821 /* Mark the link as down */ 3822 if (sc->link_state) { 3823 sc->link_state = 0; 3824 if_link_state_change(sc->ifp, 3825 LINK_STATE_DOWN); 3826 } 3827 #ifdef IFNET_BUF_RING 3828 num_tx_slices = sc->num_slices; 3829 #endif 3830 /* grab all TX locks to ensure no tx */ 3831 for (s = 0; s < num_tx_slices; s++) { 3832 ss = &sc->ss[s]; 3833 mtx_lock(&ss->tx.mtx); 3834 } 3835 mxge_close(sc, 1); 3836 } 3837 /* restore PCI configuration space */ 3838 dinfo = device_get_ivars(sc->dev); 3839 pci_cfg_restore(sc->dev, dinfo); 3840 3841 /* and redo any changes we made to our config space */ 3842 mxge_setup_cfg_space(sc); 3843 3844 /* reload f/w */ 3845 err = mxge_load_firmware(sc, 0); 3846 if (err) { 3847 device_printf(sc->dev, 3848 "Unable to re-load f/w\n"); 3849 } 3850 if (running) { 3851 if (!err) 3852 err = mxge_open(sc); 3853 /* release all TX locks */ 3854 for (s = 0; s < num_tx_slices; s++) { 3855 ss = &sc->ss[s]; 3856 #ifdef IFNET_BUF_RING 3857 mxge_start_locked(ss); 3858 #endif 3859 mtx_unlock(&ss->tx.mtx); 3860 } 3861 } 3862 sc->watchdog_resets++; 3863 } else { 3864 device_printf(sc->dev, 3865 "NIC did not reboot, not resetting\n"); 3866 err = 0; 3867 } 3868 if (err) { 3869 device_printf(sc->dev, "watchdog reset failed\n"); 3870 } else { 3871 if (sc->dying == 2) 3872 sc->dying = 0; 3873 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3874 } 3875 } 3876 3877 static void 3878 mxge_watchdog_task(void *arg, int pending) 3879 { 3880 mxge_softc_t *sc = arg; 3881 3882 3883 mtx_lock(&sc->driver_mtx); 3884 mxge_watchdog_reset(sc); 3885 mtx_unlock(&sc->driver_mtx); 3886 } 3887 3888 static void 3889 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3890 { 3891 tx = &sc->ss[slice].tx; 3892 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3893 device_printf(sc->dev, 3894 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3895 tx->req, tx->done, tx->queue_active); 3896 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3897 tx->activate, tx->deactivate); 3898 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3899 tx->pkt_done, 3900 be32toh(sc->ss->fw_stats->send_done_count)); 3901 } 3902 3903 static int 3904 mxge_watchdog(mxge_softc_t *sc) 3905 { 3906 mxge_tx_ring_t *tx; 3907 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3908 int i, err = 0; 3909 3910 /* see if we have outstanding transmits, which 3911 have been pending for more than mxge_ticks */ 3912 for (i = 0; 3913 #ifdef IFNET_BUF_RING 3914 (i < sc->num_slices) && (err == 0); 3915 #else 3916 (i < 1) && (err == 0); 3917 #endif 3918 i++) { 3919 tx = &sc->ss[i].tx; 3920 if (tx->req != tx->done && 3921 tx->watchdog_req != tx->watchdog_done && 3922 tx->done == tx->watchdog_done) { 3923 /* check for pause blocking before resetting */ 3924 if (tx->watchdog_rx_pause == rx_pause) { 3925 mxge_warn_stuck(sc, tx, i); 3926 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3927 return (ENXIO); 3928 } 3929 else 3930 device_printf(sc->dev, "Flow control blocking " 3931 "xmits, check link partner\n"); 3932 } 3933 3934 tx->watchdog_req = tx->req; 3935 tx->watchdog_done = tx->done; 3936 tx->watchdog_rx_pause = rx_pause; 3937 } 3938 3939 if (sc->need_media_probe) 3940 mxge_media_probe(sc); 3941 return (err); 3942 } 3943 3944 static u_long 3945 mxge_update_stats(mxge_softc_t *sc) 3946 { 3947 struct mxge_slice_state *ss; 3948 u_long pkts = 0; 3949 u_long ipackets = 0; 3950 u_long opackets = 0; 3951 #ifdef IFNET_BUF_RING 3952 u_long obytes = 0; 3953 u_long omcasts = 0; 3954 u_long odrops = 0; 3955 #endif 3956 u_long oerrors = 0; 3957 int slice; 3958 3959 for (slice = 0; slice < sc->num_slices; slice++) { 3960 ss = &sc->ss[slice]; 3961 ipackets += ss->ipackets; 3962 opackets += ss->opackets; 3963 #ifdef IFNET_BUF_RING 3964 obytes += ss->obytes; 3965 omcasts += ss->omcasts; 3966 odrops += ss->tx.br->br_drops; 3967 #endif 3968 oerrors += ss->oerrors; 3969 } 3970 pkts = (ipackets - sc->ifp->if_ipackets); 3971 pkts += (opackets - sc->ifp->if_opackets); 3972 sc->ifp->if_ipackets = ipackets; 3973 sc->ifp->if_opackets = opackets; 3974 #ifdef IFNET_BUF_RING 3975 sc->ifp->if_obytes = obytes; 3976 sc->ifp->if_omcasts = omcasts; 3977 sc->ifp->if_snd.ifq_drops = odrops; 3978 #endif 3979 sc->ifp->if_oerrors = oerrors; 3980 return pkts; 3981 } 3982 3983 static void 3984 mxge_tick(void *arg) 3985 { 3986 mxge_softc_t *sc = arg; 3987 u_long pkts = 0; 3988 int err = 0; 3989 int running, ticks; 3990 uint16_t cmd; 3991 3992 ticks = mxge_ticks; 3993 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3994 if (running) { 3995 /* aggregate stats from different slices */ 3996 pkts = mxge_update_stats(sc); 3997 if (!sc->watchdog_countdown) { 3998 err = mxge_watchdog(sc); 3999 sc->watchdog_countdown = 4; 4000 } 4001 sc->watchdog_countdown--; 4002 } 4003 if (pkts == 0) { 4004 /* ensure NIC did not suffer h/w fault while idle */ 4005 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4006 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4007 sc->dying = 2; 4008 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4009 err = ENXIO; 4010 } 4011 /* look less often if NIC is idle */ 4012 ticks *= 4; 4013 } 4014 4015 if (err == 0) 4016 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4017 4018 } 4019 4020 static int 4021 mxge_media_change(struct ifnet *ifp) 4022 { 4023 return EINVAL; 4024 } 4025 4026 static int 4027 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4028 { 4029 struct ifnet *ifp = sc->ifp; 4030 int real_mtu, old_mtu; 4031 int err = 0; 4032 4033 4034 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4035 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4036 return EINVAL; 4037 mtx_lock(&sc->driver_mtx); 4038 old_mtu = ifp->if_mtu; 4039 ifp->if_mtu = mtu; 4040 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4041 mxge_close(sc, 0); 4042 err = mxge_open(sc); 4043 if (err != 0) { 4044 ifp->if_mtu = old_mtu; 4045 mxge_close(sc, 0); 4046 (void) mxge_open(sc); 4047 } 4048 } 4049 mtx_unlock(&sc->driver_mtx); 4050 return err; 4051 } 4052 4053 static void 4054 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4055 { 4056 mxge_softc_t *sc = ifp->if_softc; 4057 4058 4059 if (sc == NULL) 4060 return; 4061 ifmr->ifm_status = IFM_AVALID; 4062 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4063 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4064 ifmr->ifm_active |= sc->current_media; 4065 } 4066 4067 static int 4068 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4069 { 4070 mxge_softc_t *sc = ifp->if_softc; 4071 struct ifreq *ifr = (struct ifreq *)data; 4072 int err, mask; 4073 4074 err = 0; 4075 switch (command) { 4076 case SIOCSIFADDR: 4077 case SIOCGIFADDR: 4078 err = ether_ioctl(ifp, command, data); 4079 break; 4080 4081 case SIOCSIFMTU: 4082 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4083 break; 4084 4085 case SIOCSIFFLAGS: 4086 mtx_lock(&sc->driver_mtx); 4087 if (sc->dying) { 4088 mtx_unlock(&sc->driver_mtx); 4089 return EINVAL; 4090 } 4091 if (ifp->if_flags & IFF_UP) { 4092 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4093 err = mxge_open(sc); 4094 } else { 4095 /* take care of promis can allmulti 4096 flag chages */ 4097 mxge_change_promisc(sc, 4098 ifp->if_flags & IFF_PROMISC); 4099 mxge_set_multicast_list(sc); 4100 } 4101 } else { 4102 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4103 mxge_close(sc, 0); 4104 } 4105 } 4106 mtx_unlock(&sc->driver_mtx); 4107 break; 4108 4109 case SIOCADDMULTI: 4110 case SIOCDELMULTI: 4111 mtx_lock(&sc->driver_mtx); 4112 mxge_set_multicast_list(sc); 4113 mtx_unlock(&sc->driver_mtx); 4114 break; 4115 4116 case SIOCSIFCAP: 4117 mtx_lock(&sc->driver_mtx); 4118 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4119 if (mask & IFCAP_TXCSUM) { 4120 if (IFCAP_TXCSUM & ifp->if_capenable) { 4121 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4122 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4123 | CSUM_TSO); 4124 } else { 4125 ifp->if_capenable |= IFCAP_TXCSUM; 4126 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4127 } 4128 } else if (mask & IFCAP_RXCSUM) { 4129 if (IFCAP_RXCSUM & ifp->if_capenable) { 4130 ifp->if_capenable &= ~IFCAP_RXCSUM; 4131 sc->csum_flag = 0; 4132 } else { 4133 ifp->if_capenable |= IFCAP_RXCSUM; 4134 sc->csum_flag = 1; 4135 } 4136 } 4137 if (mask & IFCAP_TSO4) { 4138 if (IFCAP_TSO4 & ifp->if_capenable) { 4139 ifp->if_capenable &= ~IFCAP_TSO4; 4140 ifp->if_hwassist &= ~CSUM_TSO; 4141 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4142 ifp->if_capenable |= IFCAP_TSO4; 4143 ifp->if_hwassist |= CSUM_TSO; 4144 } else { 4145 printf("mxge requires tx checksum offload" 4146 " be enabled to use TSO\n"); 4147 err = EINVAL; 4148 } 4149 } 4150 if (mask & IFCAP_LRO) { 4151 if (IFCAP_LRO & ifp->if_capenable) 4152 err = mxge_change_lro_locked(sc, 0); 4153 else 4154 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4155 } 4156 if (mask & IFCAP_VLAN_HWTAGGING) 4157 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4158 if (mask & IFCAP_VLAN_HWTSO) 4159 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4160 4161 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4162 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4163 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4164 4165 mtx_unlock(&sc->driver_mtx); 4166 VLAN_CAPABILITIES(ifp); 4167 4168 break; 4169 4170 case SIOCGIFMEDIA: 4171 mtx_lock(&sc->driver_mtx); 4172 mxge_media_probe(sc); 4173 mtx_unlock(&sc->driver_mtx); 4174 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4175 &sc->media, command); 4176 break; 4177 4178 default: 4179 err = ENOTTY; 4180 } 4181 return err; 4182 } 4183 4184 static void 4185 mxge_fetch_tunables(mxge_softc_t *sc) 4186 { 4187 4188 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4189 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4190 &mxge_flow_control); 4191 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4192 &mxge_intr_coal_delay); 4193 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4194 &mxge_nvidia_ecrc_enable); 4195 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4196 &mxge_force_firmware); 4197 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4198 &mxge_deassert_wait); 4199 TUNABLE_INT_FETCH("hw.mxge.verbose", 4200 &mxge_verbose); 4201 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4202 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4203 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4204 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4205 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4206 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4207 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4208 if (sc->lro_cnt != 0) 4209 mxge_lro_cnt = sc->lro_cnt; 4210 4211 if (bootverbose) 4212 mxge_verbose = 1; 4213 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4214 mxge_intr_coal_delay = 30; 4215 if (mxge_ticks == 0) 4216 mxge_ticks = hz / 2; 4217 sc->pause = mxge_flow_control; 4218 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4219 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4220 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4221 } 4222 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4223 mxge_initial_mtu < ETHER_MIN_LEN) 4224 mxge_initial_mtu = ETHERMTU_JUMBO; 4225 4226 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4227 mxge_throttle = MXGE_MAX_THROTTLE; 4228 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4229 mxge_throttle = MXGE_MIN_THROTTLE; 4230 sc->throttle = mxge_throttle; 4231 } 4232 4233 4234 static void 4235 mxge_free_slices(mxge_softc_t *sc) 4236 { 4237 struct mxge_slice_state *ss; 4238 int i; 4239 4240 4241 if (sc->ss == NULL) 4242 return; 4243 4244 for (i = 0; i < sc->num_slices; i++) { 4245 ss = &sc->ss[i]; 4246 if (ss->fw_stats != NULL) { 4247 mxge_dma_free(&ss->fw_stats_dma); 4248 ss->fw_stats = NULL; 4249 #ifdef IFNET_BUF_RING 4250 if (ss->tx.br != NULL) { 4251 drbr_free(ss->tx.br, M_DEVBUF); 4252 ss->tx.br = NULL; 4253 } 4254 #endif 4255 mtx_destroy(&ss->tx.mtx); 4256 } 4257 if (ss->rx_done.entry != NULL) { 4258 mxge_dma_free(&ss->rx_done.dma); 4259 ss->rx_done.entry = NULL; 4260 } 4261 } 4262 free(sc->ss, M_DEVBUF); 4263 sc->ss = NULL; 4264 } 4265 4266 static int 4267 mxge_alloc_slices(mxge_softc_t *sc) 4268 { 4269 mxge_cmd_t cmd; 4270 struct mxge_slice_state *ss; 4271 size_t bytes; 4272 int err, i, max_intr_slots; 4273 4274 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4275 if (err != 0) { 4276 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4277 return err; 4278 } 4279 sc->rx_ring_size = cmd.data0; 4280 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4281 4282 bytes = sizeof (*sc->ss) * sc->num_slices; 4283 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4284 if (sc->ss == NULL) 4285 return (ENOMEM); 4286 for (i = 0; i < sc->num_slices; i++) { 4287 ss = &sc->ss[i]; 4288 4289 ss->sc = sc; 4290 4291 /* allocate per-slice rx interrupt queues */ 4292 4293 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4294 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4295 if (err != 0) 4296 goto abort; 4297 ss->rx_done.entry = ss->rx_done.dma.addr; 4298 bzero(ss->rx_done.entry, bytes); 4299 4300 /* 4301 * allocate the per-slice firmware stats; stats 4302 * (including tx) are used used only on the first 4303 * slice for now 4304 */ 4305 #ifndef IFNET_BUF_RING 4306 if (i > 0) 4307 continue; 4308 #endif 4309 4310 bytes = sizeof (*ss->fw_stats); 4311 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4312 sizeof (*ss->fw_stats), 64); 4313 if (err != 0) 4314 goto abort; 4315 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4316 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4317 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4318 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4319 #ifdef IFNET_BUF_RING 4320 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4321 &ss->tx.mtx); 4322 #endif 4323 } 4324 4325 return (0); 4326 4327 abort: 4328 mxge_free_slices(sc); 4329 return (ENOMEM); 4330 } 4331 4332 static void 4333 mxge_slice_probe(mxge_softc_t *sc) 4334 { 4335 mxge_cmd_t cmd; 4336 char *old_fw; 4337 int msix_cnt, status, max_intr_slots; 4338 4339 sc->num_slices = 1; 4340 /* 4341 * don't enable multiple slices if they are not enabled, 4342 * or if this is not an SMP system 4343 */ 4344 4345 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4346 return; 4347 4348 /* see how many MSI-X interrupts are available */ 4349 msix_cnt = pci_msix_count(sc->dev); 4350 if (msix_cnt < 2) 4351 return; 4352 4353 /* now load the slice aware firmware see what it supports */ 4354 old_fw = sc->fw_name; 4355 if (old_fw == mxge_fw_aligned) 4356 sc->fw_name = mxge_fw_rss_aligned; 4357 else 4358 sc->fw_name = mxge_fw_rss_unaligned; 4359 status = mxge_load_firmware(sc, 0); 4360 if (status != 0) { 4361 device_printf(sc->dev, "Falling back to a single slice\n"); 4362 return; 4363 } 4364 4365 /* try to send a reset command to the card to see if it 4366 is alive */ 4367 memset(&cmd, 0, sizeof (cmd)); 4368 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4369 if (status != 0) { 4370 device_printf(sc->dev, "failed reset\n"); 4371 goto abort_with_fw; 4372 } 4373 4374 /* get rx ring size */ 4375 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4376 if (status != 0) { 4377 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4378 goto abort_with_fw; 4379 } 4380 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4381 4382 /* tell it the size of the interrupt queues */ 4383 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4384 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4385 if (status != 0) { 4386 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4387 goto abort_with_fw; 4388 } 4389 4390 /* ask the maximum number of slices it supports */ 4391 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4392 if (status != 0) { 4393 device_printf(sc->dev, 4394 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4395 goto abort_with_fw; 4396 } 4397 sc->num_slices = cmd.data0; 4398 if (sc->num_slices > msix_cnt) 4399 sc->num_slices = msix_cnt; 4400 4401 if (mxge_max_slices == -1) { 4402 /* cap to number of CPUs in system */ 4403 if (sc->num_slices > mp_ncpus) 4404 sc->num_slices = mp_ncpus; 4405 } else { 4406 if (sc->num_slices > mxge_max_slices) 4407 sc->num_slices = mxge_max_slices; 4408 } 4409 /* make sure it is a power of two */ 4410 while (sc->num_slices & (sc->num_slices - 1)) 4411 sc->num_slices--; 4412 4413 if (mxge_verbose) 4414 device_printf(sc->dev, "using %d slices\n", 4415 sc->num_slices); 4416 4417 return; 4418 4419 abort_with_fw: 4420 sc->fw_name = old_fw; 4421 (void) mxge_load_firmware(sc, 0); 4422 } 4423 4424 static int 4425 mxge_add_msix_irqs(mxge_softc_t *sc) 4426 { 4427 size_t bytes; 4428 int count, err, i, rid; 4429 4430 rid = PCIR_BAR(2); 4431 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4432 &rid, RF_ACTIVE); 4433 4434 if (sc->msix_table_res == NULL) { 4435 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4436 return ENXIO; 4437 } 4438 4439 count = sc->num_slices; 4440 err = pci_alloc_msix(sc->dev, &count); 4441 if (err != 0) { 4442 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4443 "err = %d \n", sc->num_slices, err); 4444 goto abort_with_msix_table; 4445 } 4446 if (count < sc->num_slices) { 4447 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4448 count, sc->num_slices); 4449 device_printf(sc->dev, 4450 "Try setting hw.mxge.max_slices to %d\n", 4451 count); 4452 err = ENOSPC; 4453 goto abort_with_msix; 4454 } 4455 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4456 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4457 if (sc->msix_irq_res == NULL) { 4458 err = ENOMEM; 4459 goto abort_with_msix; 4460 } 4461 4462 for (i = 0; i < sc->num_slices; i++) { 4463 rid = i + 1; 4464 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4465 SYS_RES_IRQ, 4466 &rid, RF_ACTIVE); 4467 if (sc->msix_irq_res[i] == NULL) { 4468 device_printf(sc->dev, "couldn't allocate IRQ res" 4469 " for message %d\n", i); 4470 err = ENXIO; 4471 goto abort_with_res; 4472 } 4473 } 4474 4475 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4476 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4477 4478 for (i = 0; i < sc->num_slices; i++) { 4479 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4480 INTR_TYPE_NET | INTR_MPSAFE, 4481 #if __FreeBSD_version > 700030 4482 NULL, 4483 #endif 4484 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4485 if (err != 0) { 4486 device_printf(sc->dev, "couldn't setup intr for " 4487 "message %d\n", i); 4488 goto abort_with_intr; 4489 } 4490 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4491 sc->msix_ih[i], "s%d", i); 4492 } 4493 4494 if (mxge_verbose) { 4495 device_printf(sc->dev, "using %d msix IRQs:", 4496 sc->num_slices); 4497 for (i = 0; i < sc->num_slices; i++) 4498 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4499 printf("\n"); 4500 } 4501 return (0); 4502 4503 abort_with_intr: 4504 for (i = 0; i < sc->num_slices; i++) { 4505 if (sc->msix_ih[i] != NULL) { 4506 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4507 sc->msix_ih[i]); 4508 sc->msix_ih[i] = NULL; 4509 } 4510 } 4511 free(sc->msix_ih, M_DEVBUF); 4512 4513 4514 abort_with_res: 4515 for (i = 0; i < sc->num_slices; i++) { 4516 rid = i + 1; 4517 if (sc->msix_irq_res[i] != NULL) 4518 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4519 sc->msix_irq_res[i]); 4520 sc->msix_irq_res[i] = NULL; 4521 } 4522 free(sc->msix_irq_res, M_DEVBUF); 4523 4524 4525 abort_with_msix: 4526 pci_release_msi(sc->dev); 4527 4528 abort_with_msix_table: 4529 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4530 sc->msix_table_res); 4531 4532 return err; 4533 } 4534 4535 static int 4536 mxge_add_single_irq(mxge_softc_t *sc) 4537 { 4538 int count, err, rid; 4539 4540 count = pci_msi_count(sc->dev); 4541 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4542 rid = 1; 4543 } else { 4544 rid = 0; 4545 sc->legacy_irq = 1; 4546 } 4547 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4548 1, RF_SHAREABLE | RF_ACTIVE); 4549 if (sc->irq_res == NULL) { 4550 device_printf(sc->dev, "could not alloc interrupt\n"); 4551 return ENXIO; 4552 } 4553 if (mxge_verbose) 4554 device_printf(sc->dev, "using %s irq %ld\n", 4555 sc->legacy_irq ? "INTx" : "MSI", 4556 rman_get_start(sc->irq_res)); 4557 err = bus_setup_intr(sc->dev, sc->irq_res, 4558 INTR_TYPE_NET | INTR_MPSAFE, 4559 #if __FreeBSD_version > 700030 4560 NULL, 4561 #endif 4562 mxge_intr, &sc->ss[0], &sc->ih); 4563 if (err != 0) { 4564 bus_release_resource(sc->dev, SYS_RES_IRQ, 4565 sc->legacy_irq ? 0 : 1, sc->irq_res); 4566 if (!sc->legacy_irq) 4567 pci_release_msi(sc->dev); 4568 } 4569 return err; 4570 } 4571 4572 static void 4573 mxge_rem_msix_irqs(mxge_softc_t *sc) 4574 { 4575 int i, rid; 4576 4577 for (i = 0; i < sc->num_slices; i++) { 4578 if (sc->msix_ih[i] != NULL) { 4579 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4580 sc->msix_ih[i]); 4581 sc->msix_ih[i] = NULL; 4582 } 4583 } 4584 free(sc->msix_ih, M_DEVBUF); 4585 4586 for (i = 0; i < sc->num_slices; i++) { 4587 rid = i + 1; 4588 if (sc->msix_irq_res[i] != NULL) 4589 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4590 sc->msix_irq_res[i]); 4591 sc->msix_irq_res[i] = NULL; 4592 } 4593 free(sc->msix_irq_res, M_DEVBUF); 4594 4595 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4596 sc->msix_table_res); 4597 4598 pci_release_msi(sc->dev); 4599 return; 4600 } 4601 4602 static void 4603 mxge_rem_single_irq(mxge_softc_t *sc) 4604 { 4605 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4606 bus_release_resource(sc->dev, SYS_RES_IRQ, 4607 sc->legacy_irq ? 0 : 1, sc->irq_res); 4608 if (!sc->legacy_irq) 4609 pci_release_msi(sc->dev); 4610 } 4611 4612 static void 4613 mxge_rem_irq(mxge_softc_t *sc) 4614 { 4615 if (sc->num_slices > 1) 4616 mxge_rem_msix_irqs(sc); 4617 else 4618 mxge_rem_single_irq(sc); 4619 } 4620 4621 static int 4622 mxge_add_irq(mxge_softc_t *sc) 4623 { 4624 int err; 4625 4626 if (sc->num_slices > 1) 4627 err = mxge_add_msix_irqs(sc); 4628 else 4629 err = mxge_add_single_irq(sc); 4630 4631 if (0 && err == 0 && sc->num_slices > 1) { 4632 mxge_rem_msix_irqs(sc); 4633 err = mxge_add_msix_irqs(sc); 4634 } 4635 return err; 4636 } 4637 4638 4639 static int 4640 mxge_attach(device_t dev) 4641 { 4642 mxge_softc_t *sc = device_get_softc(dev); 4643 struct ifnet *ifp; 4644 int err, rid; 4645 4646 sc->dev = dev; 4647 mxge_fetch_tunables(sc); 4648 4649 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4650 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4651 taskqueue_thread_enqueue, &sc->tq); 4652 if (sc->tq == NULL) { 4653 err = ENOMEM; 4654 goto abort_with_nothing; 4655 } 4656 4657 err = bus_dma_tag_create(NULL, /* parent */ 4658 1, /* alignment */ 4659 0, /* boundary */ 4660 BUS_SPACE_MAXADDR, /* low */ 4661 BUS_SPACE_MAXADDR, /* high */ 4662 NULL, NULL, /* filter */ 4663 65536 + 256, /* maxsize */ 4664 MXGE_MAX_SEND_DESC, /* num segs */ 4665 65536, /* maxsegsize */ 4666 0, /* flags */ 4667 NULL, NULL, /* lock */ 4668 &sc->parent_dmat); /* tag */ 4669 4670 if (err != 0) { 4671 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4672 err); 4673 goto abort_with_tq; 4674 } 4675 4676 ifp = sc->ifp = if_alloc(IFT_ETHER); 4677 if (ifp == NULL) { 4678 device_printf(dev, "can not if_alloc()\n"); 4679 err = ENOSPC; 4680 goto abort_with_parent_dmat; 4681 } 4682 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4683 4684 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4685 device_get_nameunit(dev)); 4686 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4687 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4688 "%s:drv", device_get_nameunit(dev)); 4689 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4690 MTX_NETWORK_LOCK, MTX_DEF); 4691 4692 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4693 4694 mxge_setup_cfg_space(sc); 4695 4696 /* Map the board into the kernel */ 4697 rid = PCIR_BARS; 4698 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4699 ~0, 1, RF_ACTIVE); 4700 if (sc->mem_res == NULL) { 4701 device_printf(dev, "could not map memory\n"); 4702 err = ENXIO; 4703 goto abort_with_lock; 4704 } 4705 sc->sram = rman_get_virtual(sc->mem_res); 4706 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4707 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4708 device_printf(dev, "impossible memory region size %ld\n", 4709 rman_get_size(sc->mem_res)); 4710 err = ENXIO; 4711 goto abort_with_mem_res; 4712 } 4713 4714 /* make NULL terminated copy of the EEPROM strings section of 4715 lanai SRAM */ 4716 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4717 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4718 rman_get_bushandle(sc->mem_res), 4719 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4720 sc->eeprom_strings, 4721 MXGE_EEPROM_STRINGS_SIZE - 2); 4722 err = mxge_parse_strings(sc); 4723 if (err != 0) 4724 goto abort_with_mem_res; 4725 4726 /* Enable write combining for efficient use of PCIe bus */ 4727 mxge_enable_wc(sc); 4728 4729 /* Allocate the out of band dma memory */ 4730 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4731 sizeof (mxge_cmd_t), 64); 4732 if (err != 0) 4733 goto abort_with_mem_res; 4734 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4735 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4736 if (err != 0) 4737 goto abort_with_cmd_dma; 4738 4739 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4740 if (err != 0) 4741 goto abort_with_zeropad_dma; 4742 4743 /* select & load the firmware */ 4744 err = mxge_select_firmware(sc); 4745 if (err != 0) 4746 goto abort_with_dmabench; 4747 sc->intr_coal_delay = mxge_intr_coal_delay; 4748 4749 mxge_slice_probe(sc); 4750 err = mxge_alloc_slices(sc); 4751 if (err != 0) 4752 goto abort_with_dmabench; 4753 4754 err = mxge_reset(sc, 0); 4755 if (err != 0) 4756 goto abort_with_slices; 4757 4758 err = mxge_alloc_rings(sc); 4759 if (err != 0) { 4760 device_printf(sc->dev, "failed to allocate rings\n"); 4761 goto abort_with_slices; 4762 } 4763 4764 err = mxge_add_irq(sc); 4765 if (err != 0) { 4766 device_printf(sc->dev, "failed to add irq\n"); 4767 goto abort_with_rings; 4768 } 4769 4770 ifp->if_baudrate = IF_Gbps(10UL); 4771 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4772 IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 4773 #ifdef INET 4774 ifp->if_capabilities |= IFCAP_LRO; 4775 #endif 4776 4777 #ifdef MXGE_NEW_VLAN_API 4778 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4779 4780 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4781 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4782 sc->fw_ver_tiny >= 32) 4783 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4784 #endif 4785 4786 sc->max_mtu = mxge_max_mtu(sc); 4787 if (sc->max_mtu >= 9000) 4788 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4789 else 4790 device_printf(dev, "MTU limited to %d. Install " 4791 "latest firmware for 9000 byte jumbo support\n", 4792 sc->max_mtu - ETHER_HDR_LEN); 4793 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4794 ifp->if_capenable = ifp->if_capabilities; 4795 if (sc->lro_cnt == 0) 4796 ifp->if_capenable &= ~IFCAP_LRO; 4797 sc->csum_flag = 1; 4798 ifp->if_init = mxge_init; 4799 ifp->if_softc = sc; 4800 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4801 ifp->if_ioctl = mxge_ioctl; 4802 ifp->if_start = mxge_start; 4803 /* Initialise the ifmedia structure */ 4804 ifmedia_init(&sc->media, 0, mxge_media_change, 4805 mxge_media_status); 4806 mxge_media_init(sc); 4807 mxge_media_probe(sc); 4808 sc->dying = 0; 4809 ether_ifattach(ifp, sc->mac_addr); 4810 /* ether_ifattach sets mtu to ETHERMTU */ 4811 if (mxge_initial_mtu != ETHERMTU) 4812 mxge_change_mtu(sc, mxge_initial_mtu); 4813 4814 mxge_add_sysctls(sc); 4815 #ifdef IFNET_BUF_RING 4816 ifp->if_transmit = mxge_transmit; 4817 ifp->if_qflush = mxge_qflush; 4818 #endif 4819 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4820 device_get_nameunit(sc->dev)); 4821 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4822 return 0; 4823 4824 abort_with_rings: 4825 mxge_free_rings(sc); 4826 abort_with_slices: 4827 mxge_free_slices(sc); 4828 abort_with_dmabench: 4829 mxge_dma_free(&sc->dmabench_dma); 4830 abort_with_zeropad_dma: 4831 mxge_dma_free(&sc->zeropad_dma); 4832 abort_with_cmd_dma: 4833 mxge_dma_free(&sc->cmd_dma); 4834 abort_with_mem_res: 4835 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4836 abort_with_lock: 4837 pci_disable_busmaster(dev); 4838 mtx_destroy(&sc->cmd_mtx); 4839 mtx_destroy(&sc->driver_mtx); 4840 if_free(ifp); 4841 abort_with_parent_dmat: 4842 bus_dma_tag_destroy(sc->parent_dmat); 4843 abort_with_tq: 4844 if (sc->tq != NULL) { 4845 taskqueue_drain(sc->tq, &sc->watchdog_task); 4846 taskqueue_free(sc->tq); 4847 sc->tq = NULL; 4848 } 4849 abort_with_nothing: 4850 return err; 4851 } 4852 4853 static int 4854 mxge_detach(device_t dev) 4855 { 4856 mxge_softc_t *sc = device_get_softc(dev); 4857 4858 if (mxge_vlans_active(sc)) { 4859 device_printf(sc->dev, 4860 "Detach vlans before removing module\n"); 4861 return EBUSY; 4862 } 4863 mtx_lock(&sc->driver_mtx); 4864 sc->dying = 1; 4865 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4866 mxge_close(sc, 0); 4867 mtx_unlock(&sc->driver_mtx); 4868 ether_ifdetach(sc->ifp); 4869 if (sc->tq != NULL) { 4870 taskqueue_drain(sc->tq, &sc->watchdog_task); 4871 taskqueue_free(sc->tq); 4872 sc->tq = NULL; 4873 } 4874 callout_drain(&sc->co_hdl); 4875 ifmedia_removeall(&sc->media); 4876 mxge_dummy_rdma(sc, 0); 4877 mxge_rem_sysctls(sc); 4878 mxge_rem_irq(sc); 4879 mxge_free_rings(sc); 4880 mxge_free_slices(sc); 4881 mxge_dma_free(&sc->dmabench_dma); 4882 mxge_dma_free(&sc->zeropad_dma); 4883 mxge_dma_free(&sc->cmd_dma); 4884 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4885 pci_disable_busmaster(dev); 4886 mtx_destroy(&sc->cmd_mtx); 4887 mtx_destroy(&sc->driver_mtx); 4888 if_free(sc->ifp); 4889 bus_dma_tag_destroy(sc->parent_dmat); 4890 return 0; 4891 } 4892 4893 static int 4894 mxge_shutdown(device_t dev) 4895 { 4896 return 0; 4897 } 4898 4899 /* 4900 This file uses Myri10GE driver indentation. 4901 4902 Local Variables: 4903 c-file-style:"linux" 4904 tab-width:8 4905 End: 4906 */ 4907