1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <machine/bus.h> 68 #include <machine/in_cksum.h> 69 #include <machine/resource.h> 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 #include <sys/smp.h> 73 74 #include <dev/pci/pcireg.h> 75 #include <dev/pci/pcivar.h> 76 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 77 78 #include <vm/vm.h> /* for pmap_mapdev() */ 79 #include <vm/pmap.h> 80 81 #if defined(__i386) || defined(__amd64) 82 #include <machine/specialreg.h> 83 #endif 84 85 #include <dev/mxge/mxge_mcp.h> 86 #include <dev/mxge/mcp_gen_header.h> 87 /*#define MXGE_FAKE_IFP*/ 88 #include <dev/mxge/if_mxge_var.h> 89 #ifdef IFNET_BUF_RING 90 #include <sys/buf_ring.h> 91 #endif 92 93 #include "opt_inet.h" 94 95 /* tunable params */ 96 static int mxge_nvidia_ecrc_enable = 1; 97 static int mxge_force_firmware = 0; 98 static int mxge_intr_coal_delay = 30; 99 static int mxge_deassert_wait = 1; 100 static int mxge_flow_control = 1; 101 static int mxge_verbose = 0; 102 static int mxge_lro_cnt = 8; 103 static int mxge_ticks; 104 static int mxge_max_slices = 1; 105 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 106 static int mxge_always_promisc = 0; 107 static int mxge_initial_mtu = ETHERMTU_JUMBO; 108 static int mxge_throttle = 0; 109 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 110 static char *mxge_fw_aligned = "mxge_eth_z8e"; 111 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 112 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 113 114 static int mxge_probe(device_t dev); 115 static int mxge_attach(device_t dev); 116 static int mxge_detach(device_t dev); 117 static int mxge_shutdown(device_t dev); 118 static void mxge_intr(void *arg); 119 120 static device_method_t mxge_methods[] = 121 { 122 /* Device interface */ 123 DEVMETHOD(device_probe, mxge_probe), 124 DEVMETHOD(device_attach, mxge_attach), 125 DEVMETHOD(device_detach, mxge_detach), 126 DEVMETHOD(device_shutdown, mxge_shutdown), 127 128 DEVMETHOD_END 129 }; 130 131 static driver_t mxge_driver = 132 { 133 "mxge", 134 mxge_methods, 135 sizeof(mxge_softc_t), 136 }; 137 138 static devclass_t mxge_devclass; 139 140 /* Declare ourselves to be a child of the PCI bus.*/ 141 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 142 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 143 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 144 145 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 146 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 147 static int mxge_close(mxge_softc_t *sc, int down); 148 static int mxge_open(mxge_softc_t *sc); 149 static void mxge_tick(void *arg); 150 151 static int 152 mxge_probe(device_t dev) 153 { 154 int rev; 155 156 157 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 158 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 159 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 160 rev = pci_get_revid(dev); 161 switch (rev) { 162 case MXGE_PCI_REV_Z8E: 163 device_set_desc(dev, "Myri10G-PCIE-8A"); 164 break; 165 case MXGE_PCI_REV_Z8ES: 166 device_set_desc(dev, "Myri10G-PCIE-8B"); 167 break; 168 default: 169 device_set_desc(dev, "Myri10G-PCIE-8??"); 170 device_printf(dev, "Unrecognized rev %d NIC\n", 171 rev); 172 break; 173 } 174 return 0; 175 } 176 return ENXIO; 177 } 178 179 static void 180 mxge_enable_wc(mxge_softc_t *sc) 181 { 182 #if defined(__i386) || defined(__amd64) 183 vm_offset_t len; 184 int err; 185 186 sc->wc = 1; 187 len = rman_get_size(sc->mem_res); 188 err = pmap_change_attr((vm_offset_t) sc->sram, 189 len, PAT_WRITE_COMBINING); 190 if (err != 0) { 191 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 192 err); 193 sc->wc = 0; 194 } 195 #endif 196 } 197 198 199 /* callback to get our DMA address */ 200 static void 201 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 202 int error) 203 { 204 if (error == 0) { 205 *(bus_addr_t *) arg = segs->ds_addr; 206 } 207 } 208 209 static int 210 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 211 bus_size_t alignment) 212 { 213 int err; 214 device_t dev = sc->dev; 215 bus_size_t boundary, maxsegsize; 216 217 if (bytes > 4096 && alignment == 4096) { 218 boundary = 0; 219 maxsegsize = bytes; 220 } else { 221 boundary = 4096; 222 maxsegsize = 4096; 223 } 224 225 /* allocate DMAable memory tags */ 226 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 227 alignment, /* alignment */ 228 boundary, /* boundary */ 229 BUS_SPACE_MAXADDR, /* low */ 230 BUS_SPACE_MAXADDR, /* high */ 231 NULL, NULL, /* filter */ 232 bytes, /* maxsize */ 233 1, /* num segs */ 234 maxsegsize, /* maxsegsize */ 235 BUS_DMA_COHERENT, /* flags */ 236 NULL, NULL, /* lock */ 237 &dma->dmat); /* tag */ 238 if (err != 0) { 239 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 240 return err; 241 } 242 243 /* allocate DMAable memory & map */ 244 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 245 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 246 | BUS_DMA_ZERO), &dma->map); 247 if (err != 0) { 248 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 249 goto abort_with_dmat; 250 } 251 252 /* load the memory */ 253 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 254 mxge_dmamap_callback, 255 (void *)&dma->bus_addr, 0); 256 if (err != 0) { 257 device_printf(dev, "couldn't load map (err = %d)\n", err); 258 goto abort_with_mem; 259 } 260 return 0; 261 262 abort_with_mem: 263 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 264 abort_with_dmat: 265 (void)bus_dma_tag_destroy(dma->dmat); 266 return err; 267 } 268 269 270 static void 271 mxge_dma_free(mxge_dma_t *dma) 272 { 273 bus_dmamap_unload(dma->dmat, dma->map); 274 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 275 (void)bus_dma_tag_destroy(dma->dmat); 276 } 277 278 /* 279 * The eeprom strings on the lanaiX have the format 280 * SN=x\0 281 * MAC=x:x:x:x:x:x\0 282 * PC=text\0 283 */ 284 285 static int 286 mxge_parse_strings(mxge_softc_t *sc) 287 { 288 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 289 290 char *ptr, *limit; 291 int i, found_mac; 292 293 ptr = sc->eeprom_strings; 294 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 295 found_mac = 0; 296 while (ptr < limit && *ptr != '\0') { 297 if (memcmp(ptr, "MAC=", 4) == 0) { 298 ptr += 1; 299 sc->mac_addr_string = ptr; 300 for (i = 0; i < 6; i++) { 301 ptr += 3; 302 if ((ptr + 2) > limit) 303 goto abort; 304 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 305 found_mac = 1; 306 } 307 } else if (memcmp(ptr, "PC=", 3) == 0) { 308 ptr += 3; 309 strncpy(sc->product_code_string, ptr, 310 sizeof (sc->product_code_string) - 1); 311 } else if (memcmp(ptr, "SN=", 3) == 0) { 312 ptr += 3; 313 strncpy(sc->serial_number_string, ptr, 314 sizeof (sc->serial_number_string) - 1); 315 } 316 MXGE_NEXT_STRING(ptr); 317 } 318 319 if (found_mac) 320 return 0; 321 322 abort: 323 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 324 325 return ENXIO; 326 } 327 328 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 329 static void 330 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 331 { 332 uint32_t val; 333 unsigned long base, off; 334 char *va, *cfgptr; 335 device_t pdev, mcp55; 336 uint16_t vendor_id, device_id, word; 337 uintptr_t bus, slot, func, ivend, idev; 338 uint32_t *ptr32; 339 340 341 if (!mxge_nvidia_ecrc_enable) 342 return; 343 344 pdev = device_get_parent(device_get_parent(sc->dev)); 345 if (pdev == NULL) { 346 device_printf(sc->dev, "could not find parent?\n"); 347 return; 348 } 349 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 350 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 351 352 if (vendor_id != 0x10de) 353 return; 354 355 base = 0; 356 357 if (device_id == 0x005d) { 358 /* ck804, base address is magic */ 359 base = 0xe0000000UL; 360 } else if (device_id >= 0x0374 && device_id <= 0x378) { 361 /* mcp55, base address stored in chipset */ 362 mcp55 = pci_find_bsf(0, 0, 0); 363 if (mcp55 && 364 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 365 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 366 word = pci_read_config(mcp55, 0x90, 2); 367 base = ((unsigned long)word & 0x7ffeU) << 25; 368 } 369 } 370 if (!base) 371 return; 372 373 /* XXXX 374 Test below is commented because it is believed that doing 375 config read/write beyond 0xff will access the config space 376 for the next larger function. Uncomment this and remove 377 the hacky pmap_mapdev() way of accessing config space when 378 FreeBSD grows support for extended pcie config space access 379 */ 380 #if 0 381 /* See if we can, by some miracle, access the extended 382 config space */ 383 val = pci_read_config(pdev, 0x178, 4); 384 if (val != 0xffffffff) { 385 val |= 0x40; 386 pci_write_config(pdev, 0x178, val, 4); 387 return; 388 } 389 #endif 390 /* Rather than using normal pci config space writes, we must 391 * map the Nvidia config space ourselves. This is because on 392 * opteron/nvidia class machine the 0xe000000 mapping is 393 * handled by the nvidia chipset, that means the internal PCI 394 * device (the on-chip northbridge), or the amd-8131 bridge 395 * and things behind them are not visible by this method. 396 */ 397 398 BUS_READ_IVAR(device_get_parent(pdev), pdev, 399 PCI_IVAR_BUS, &bus); 400 BUS_READ_IVAR(device_get_parent(pdev), pdev, 401 PCI_IVAR_SLOT, &slot); 402 BUS_READ_IVAR(device_get_parent(pdev), pdev, 403 PCI_IVAR_FUNCTION, &func); 404 BUS_READ_IVAR(device_get_parent(pdev), pdev, 405 PCI_IVAR_VENDOR, &ivend); 406 BUS_READ_IVAR(device_get_parent(pdev), pdev, 407 PCI_IVAR_DEVICE, &idev); 408 409 off = base 410 + 0x00100000UL * (unsigned long)bus 411 + 0x00001000UL * (unsigned long)(func 412 + 8 * slot); 413 414 /* map it into the kernel */ 415 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 416 417 418 if (va == NULL) { 419 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 420 return; 421 } 422 /* get a pointer to the config space mapped into the kernel */ 423 cfgptr = va + (off & PAGE_MASK); 424 425 /* make sure that we can really access it */ 426 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 427 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 428 if (! (vendor_id == ivend && device_id == idev)) { 429 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 430 vendor_id, device_id); 431 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 432 return; 433 } 434 435 ptr32 = (uint32_t*)(cfgptr + 0x178); 436 val = *ptr32; 437 438 if (val == 0xffffffff) { 439 device_printf(sc->dev, "extended mapping failed\n"); 440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 441 return; 442 } 443 *ptr32 = val | 0x40; 444 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 445 if (mxge_verbose) 446 device_printf(sc->dev, 447 "Enabled ECRC on upstream Nvidia bridge " 448 "at %d:%d:%d\n", 449 (int)bus, (int)slot, (int)func); 450 return; 451 } 452 #else 453 static void 454 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 455 { 456 device_printf(sc->dev, 457 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 458 return; 459 } 460 #endif 461 462 463 static int 464 mxge_dma_test(mxge_softc_t *sc, int test_type) 465 { 466 mxge_cmd_t cmd; 467 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 468 int status; 469 uint32_t len; 470 char *test = " "; 471 472 473 /* Run a small DMA test. 474 * The magic multipliers to the length tell the firmware 475 * to do DMA read, write, or read+write tests. The 476 * results are returned in cmd.data0. The upper 16 477 * bits of the return is the number of transfers completed. 478 * The lower 16 bits is the time in 0.5us ticks that the 479 * transfers took to complete. 480 */ 481 482 len = sc->tx_boundary; 483 484 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 485 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 486 cmd.data2 = len * 0x10000; 487 status = mxge_send_cmd(sc, test_type, &cmd); 488 if (status != 0) { 489 test = "read"; 490 goto abort; 491 } 492 sc->read_dma = ((cmd.data0>>16) * len * 2) / 493 (cmd.data0 & 0xffff); 494 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 495 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 496 cmd.data2 = len * 0x1; 497 status = mxge_send_cmd(sc, test_type, &cmd); 498 if (status != 0) { 499 test = "write"; 500 goto abort; 501 } 502 sc->write_dma = ((cmd.data0>>16) * len * 2) / 503 (cmd.data0 & 0xffff); 504 505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 507 cmd.data2 = len * 0x10001; 508 status = mxge_send_cmd(sc, test_type, &cmd); 509 if (status != 0) { 510 test = "read/write"; 511 goto abort; 512 } 513 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 514 (cmd.data0 & 0xffff); 515 516 abort: 517 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 518 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 519 test, status); 520 521 return status; 522 } 523 524 /* 525 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 526 * when the PCI-E Completion packets are aligned on an 8-byte 527 * boundary. Some PCI-E chip sets always align Completion packets; on 528 * the ones that do not, the alignment can be enforced by enabling 529 * ECRC generation (if supported). 530 * 531 * When PCI-E Completion packets are not aligned, it is actually more 532 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 533 * 534 * If the driver can neither enable ECRC nor verify that it has 535 * already been enabled, then it must use a firmware image which works 536 * around unaligned completion packets (ethp_z8e.dat), and it should 537 * also ensure that it never gives the device a Read-DMA which is 538 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 539 * enabled, then the driver should use the aligned (eth_z8e.dat) 540 * firmware image, and set tx_boundary to 4KB. 541 */ 542 543 static int 544 mxge_firmware_probe(mxge_softc_t *sc) 545 { 546 device_t dev = sc->dev; 547 int reg, status; 548 uint16_t pectl; 549 550 sc->tx_boundary = 4096; 551 /* 552 * Verify the max read request size was set to 4KB 553 * before trying the test with 4KB. 554 */ 555 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 556 pectl = pci_read_config(dev, reg + 0x8, 2); 557 if ((pectl & (5 << 12)) != (5 << 12)) { 558 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 559 pectl); 560 sc->tx_boundary = 2048; 561 } 562 } 563 564 /* 565 * load the optimized firmware (which assumes aligned PCIe 566 * completions) in order to see if it works on this host. 567 */ 568 sc->fw_name = mxge_fw_aligned; 569 status = mxge_load_firmware(sc, 1); 570 if (status != 0) { 571 return status; 572 } 573 574 /* 575 * Enable ECRC if possible 576 */ 577 mxge_enable_nvidia_ecrc(sc); 578 579 /* 580 * Run a DMA test which watches for unaligned completions and 581 * aborts on the first one seen. 582 */ 583 584 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 585 if (status == 0) 586 return 0; /* keep the aligned firmware */ 587 588 if (status != E2BIG) 589 device_printf(dev, "DMA test failed: %d\n", status); 590 if (status == ENOSYS) 591 device_printf(dev, "Falling back to ethp! " 592 "Please install up to date fw\n"); 593 return status; 594 } 595 596 static int 597 mxge_select_firmware(mxge_softc_t *sc) 598 { 599 int aligned = 0; 600 int force_firmware = mxge_force_firmware; 601 602 if (sc->throttle) 603 force_firmware = sc->throttle; 604 605 if (force_firmware != 0) { 606 if (force_firmware == 1) 607 aligned = 1; 608 else 609 aligned = 0; 610 if (mxge_verbose) 611 device_printf(sc->dev, 612 "Assuming %s completions (forced)\n", 613 aligned ? "aligned" : "unaligned"); 614 goto abort; 615 } 616 617 /* if the PCIe link width is 4 or less, we can use the aligned 618 firmware and skip any checks */ 619 if (sc->link_width != 0 && sc->link_width <= 4) { 620 device_printf(sc->dev, 621 "PCIe x%d Link, expect reduced performance\n", 622 sc->link_width); 623 aligned = 1; 624 goto abort; 625 } 626 627 if (0 == mxge_firmware_probe(sc)) 628 return 0; 629 630 abort: 631 if (aligned) { 632 sc->fw_name = mxge_fw_aligned; 633 sc->tx_boundary = 4096; 634 } else { 635 sc->fw_name = mxge_fw_unaligned; 636 sc->tx_boundary = 2048; 637 } 638 return (mxge_load_firmware(sc, 0)); 639 } 640 641 union qualhack 642 { 643 const char *ro_char; 644 char *rw_char; 645 }; 646 647 static int 648 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 649 { 650 651 652 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 653 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 654 be32toh(hdr->mcp_type)); 655 return EIO; 656 } 657 658 /* save firmware version for sysctl */ 659 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 660 if (mxge_verbose) 661 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 662 663 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 664 &sc->fw_ver_minor, &sc->fw_ver_tiny); 665 666 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 667 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 668 device_printf(sc->dev, "Found firmware version %s\n", 669 sc->fw_version); 670 device_printf(sc->dev, "Driver needs %d.%d\n", 671 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 672 return EINVAL; 673 } 674 return 0; 675 676 } 677 678 static void * 679 z_alloc(void *nil, u_int items, u_int size) 680 { 681 void *ptr; 682 683 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 684 return ptr; 685 } 686 687 static void 688 z_free(void *nil, void *ptr) 689 { 690 free(ptr, M_TEMP); 691 } 692 693 694 static int 695 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 696 { 697 z_stream zs; 698 char *inflate_buffer; 699 const struct firmware *fw; 700 const mcp_gen_header_t *hdr; 701 unsigned hdr_offset; 702 int status; 703 unsigned int i; 704 char dummy; 705 size_t fw_len; 706 707 fw = firmware_get(sc->fw_name); 708 if (fw == NULL) { 709 device_printf(sc->dev, "Could not find firmware image %s\n", 710 sc->fw_name); 711 return ENOENT; 712 } 713 714 715 716 /* setup zlib and decompress f/w */ 717 bzero(&zs, sizeof (zs)); 718 zs.zalloc = z_alloc; 719 zs.zfree = z_free; 720 status = inflateInit(&zs); 721 if (status != Z_OK) { 722 status = EIO; 723 goto abort_with_fw; 724 } 725 726 /* the uncompressed size is stored as the firmware version, 727 which would otherwise go unused */ 728 fw_len = (size_t) fw->version; 729 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 730 if (inflate_buffer == NULL) 731 goto abort_with_zs; 732 zs.avail_in = fw->datasize; 733 zs.next_in = __DECONST(char *, fw->data); 734 zs.avail_out = fw_len; 735 zs.next_out = inflate_buffer; 736 status = inflate(&zs, Z_FINISH); 737 if (status != Z_STREAM_END) { 738 device_printf(sc->dev, "zlib %d\n", status); 739 status = EIO; 740 goto abort_with_buffer; 741 } 742 743 /* check id */ 744 hdr_offset = htobe32(*(const uint32_t *) 745 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 746 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 747 device_printf(sc->dev, "Bad firmware file"); 748 status = EIO; 749 goto abort_with_buffer; 750 } 751 hdr = (const void*)(inflate_buffer + hdr_offset); 752 753 status = mxge_validate_firmware(sc, hdr); 754 if (status != 0) 755 goto abort_with_buffer; 756 757 /* Copy the inflated firmware to NIC SRAM. */ 758 for (i = 0; i < fw_len; i += 256) { 759 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 760 inflate_buffer + i, 761 min(256U, (unsigned)(fw_len - i))); 762 wmb(); 763 dummy = *sc->sram; 764 wmb(); 765 } 766 767 *limit = fw_len; 768 status = 0; 769 abort_with_buffer: 770 free(inflate_buffer, M_TEMP); 771 abort_with_zs: 772 inflateEnd(&zs); 773 abort_with_fw: 774 firmware_put(fw, FIRMWARE_UNLOAD); 775 return status; 776 } 777 778 /* 779 * Enable or disable periodic RDMAs from the host to make certain 780 * chipsets resend dropped PCIe messages 781 */ 782 783 static void 784 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 785 { 786 char buf_bytes[72]; 787 volatile uint32_t *confirm; 788 volatile char *submit; 789 uint32_t *buf, dma_low, dma_high; 790 int i; 791 792 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 793 794 /* clear confirmation addr */ 795 confirm = (volatile uint32_t *)sc->cmd; 796 *confirm = 0; 797 wmb(); 798 799 /* send an rdma command to the PCIe engine, and wait for the 800 response in the confirmation address. The firmware should 801 write a -1 there to indicate it is alive and well 802 */ 803 804 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 805 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 806 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 807 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 808 buf[2] = htobe32(0xffffffff); /* confirm data */ 809 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 810 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 811 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 812 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 813 buf[5] = htobe32(enable); /* enable? */ 814 815 816 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 817 818 mxge_pio_copy(submit, buf, 64); 819 wmb(); 820 DELAY(1000); 821 wmb(); 822 i = 0; 823 while (*confirm != 0xffffffff && i < 20) { 824 DELAY(1000); 825 i++; 826 } 827 if (*confirm != 0xffffffff) { 828 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 829 (enable ? "enable" : "disable"), confirm, 830 *confirm); 831 } 832 return; 833 } 834 835 static int 836 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 837 { 838 mcp_cmd_t *buf; 839 char buf_bytes[sizeof(*buf) + 8]; 840 volatile mcp_cmd_response_t *response = sc->cmd; 841 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 842 uint32_t dma_low, dma_high; 843 int err, sleep_total = 0; 844 845 /* ensure buf is aligned to 8 bytes */ 846 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 847 848 buf->data0 = htobe32(data->data0); 849 buf->data1 = htobe32(data->data1); 850 buf->data2 = htobe32(data->data2); 851 buf->cmd = htobe32(cmd); 852 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 853 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 854 855 buf->response_addr.low = htobe32(dma_low); 856 buf->response_addr.high = htobe32(dma_high); 857 mtx_lock(&sc->cmd_mtx); 858 response->result = 0xffffffff; 859 wmb(); 860 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 861 862 /* wait up to 20ms */ 863 err = EAGAIN; 864 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 865 bus_dmamap_sync(sc->cmd_dma.dmat, 866 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 867 wmb(); 868 switch (be32toh(response->result)) { 869 case 0: 870 data->data0 = be32toh(response->data); 871 err = 0; 872 break; 873 case 0xffffffff: 874 DELAY(1000); 875 break; 876 case MXGEFW_CMD_UNKNOWN: 877 err = ENOSYS; 878 break; 879 case MXGEFW_CMD_ERROR_UNALIGNED: 880 err = E2BIG; 881 break; 882 case MXGEFW_CMD_ERROR_BUSY: 883 err = EBUSY; 884 break; 885 case MXGEFW_CMD_ERROR_I2C_ABSENT: 886 err = ENXIO; 887 break; 888 default: 889 device_printf(sc->dev, 890 "mxge: command %d " 891 "failed, result = %d\n", 892 cmd, be32toh(response->result)); 893 err = ENXIO; 894 break; 895 } 896 if (err != EAGAIN) 897 break; 898 } 899 if (err == EAGAIN) 900 device_printf(sc->dev, "mxge: command %d timed out" 901 "result = %d\n", 902 cmd, be32toh(response->result)); 903 mtx_unlock(&sc->cmd_mtx); 904 return err; 905 } 906 907 static int 908 mxge_adopt_running_firmware(mxge_softc_t *sc) 909 { 910 struct mcp_gen_header *hdr; 911 const size_t bytes = sizeof (struct mcp_gen_header); 912 size_t hdr_offset; 913 int status; 914 915 /* find running firmware header */ 916 hdr_offset = htobe32(*(volatile uint32_t *) 917 (sc->sram + MCP_HEADER_PTR_OFFSET)); 918 919 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 920 device_printf(sc->dev, 921 "Running firmware has bad header offset (%d)\n", 922 (int)hdr_offset); 923 return EIO; 924 } 925 926 /* copy header of running firmware from SRAM to host memory to 927 * validate firmware */ 928 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 929 if (hdr == NULL) { 930 device_printf(sc->dev, "could not malloc firmware hdr\n"); 931 return ENOMEM; 932 } 933 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 934 rman_get_bushandle(sc->mem_res), 935 hdr_offset, (char *)hdr, bytes); 936 status = mxge_validate_firmware(sc, hdr); 937 free(hdr, M_DEVBUF); 938 939 /* 940 * check to see if adopted firmware has bug where adopting 941 * it will cause broadcasts to be filtered unless the NIC 942 * is kept in ALLMULTI mode 943 */ 944 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 945 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 946 sc->adopted_rx_filter_bug = 1; 947 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 948 "working around rx filter bug\n", 949 sc->fw_ver_major, sc->fw_ver_minor, 950 sc->fw_ver_tiny); 951 } 952 953 return status; 954 } 955 956 957 static int 958 mxge_load_firmware(mxge_softc_t *sc, int adopt) 959 { 960 volatile uint32_t *confirm; 961 volatile char *submit; 962 char buf_bytes[72]; 963 uint32_t *buf, size, dma_low, dma_high; 964 int status, i; 965 966 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 967 968 size = sc->sram_size; 969 status = mxge_load_firmware_helper(sc, &size); 970 if (status) { 971 if (!adopt) 972 return status; 973 /* Try to use the currently running firmware, if 974 it is new enough */ 975 status = mxge_adopt_running_firmware(sc); 976 if (status) { 977 device_printf(sc->dev, 978 "failed to adopt running firmware\n"); 979 return status; 980 } 981 device_printf(sc->dev, 982 "Successfully adopted running firmware\n"); 983 if (sc->tx_boundary == 4096) { 984 device_printf(sc->dev, 985 "Using firmware currently running on NIC" 986 ". For optimal\n"); 987 device_printf(sc->dev, 988 "performance consider loading optimized " 989 "firmware\n"); 990 } 991 sc->fw_name = mxge_fw_unaligned; 992 sc->tx_boundary = 2048; 993 return 0; 994 } 995 /* clear confirmation addr */ 996 confirm = (volatile uint32_t *)sc->cmd; 997 *confirm = 0; 998 wmb(); 999 /* send a reload command to the bootstrap MCP, and wait for the 1000 response in the confirmation address. The firmware should 1001 write a -1 there to indicate it is alive and well 1002 */ 1003 1004 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1005 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1006 1007 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1008 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1009 buf[2] = htobe32(0xffffffff); /* confirm data */ 1010 1011 /* FIX: All newest firmware should un-protect the bottom of 1012 the sram before handoff. However, the very first interfaces 1013 do not. Therefore the handoff copy must skip the first 8 bytes 1014 */ 1015 /* where the code starts*/ 1016 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1017 buf[4] = htobe32(size - 8); /* length of code */ 1018 buf[5] = htobe32(8); /* where to copy to */ 1019 buf[6] = htobe32(0); /* where to jump to */ 1020 1021 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1022 mxge_pio_copy(submit, buf, 64); 1023 wmb(); 1024 DELAY(1000); 1025 wmb(); 1026 i = 0; 1027 while (*confirm != 0xffffffff && i < 20) { 1028 DELAY(1000*10); 1029 i++; 1030 bus_dmamap_sync(sc->cmd_dma.dmat, 1031 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1032 } 1033 if (*confirm != 0xffffffff) { 1034 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1035 confirm, *confirm); 1036 1037 return ENXIO; 1038 } 1039 return 0; 1040 } 1041 1042 static int 1043 mxge_update_mac_address(mxge_softc_t *sc) 1044 { 1045 mxge_cmd_t cmd; 1046 uint8_t *addr = sc->mac_addr; 1047 int status; 1048 1049 1050 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1051 | (addr[2] << 8) | addr[3]); 1052 1053 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1054 1055 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1056 return status; 1057 } 1058 1059 static int 1060 mxge_change_pause(mxge_softc_t *sc, int pause) 1061 { 1062 mxge_cmd_t cmd; 1063 int status; 1064 1065 if (pause) 1066 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1067 &cmd); 1068 else 1069 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1070 &cmd); 1071 1072 if (status) { 1073 device_printf(sc->dev, "Failed to set flow control mode\n"); 1074 return ENXIO; 1075 } 1076 sc->pause = pause; 1077 return 0; 1078 } 1079 1080 static void 1081 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1082 { 1083 mxge_cmd_t cmd; 1084 int status; 1085 1086 if (mxge_always_promisc) 1087 promisc = 1; 1088 1089 if (promisc) 1090 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1091 &cmd); 1092 else 1093 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1094 &cmd); 1095 1096 if (status) { 1097 device_printf(sc->dev, "Failed to set promisc mode\n"); 1098 } 1099 } 1100 1101 static void 1102 mxge_set_multicast_list(mxge_softc_t *sc) 1103 { 1104 mxge_cmd_t cmd; 1105 struct ifmultiaddr *ifma; 1106 struct ifnet *ifp = sc->ifp; 1107 int err; 1108 1109 /* This firmware is known to not support multicast */ 1110 if (!sc->fw_multicast_support) 1111 return; 1112 1113 /* Disable multicast filtering while we play with the lists*/ 1114 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1115 if (err != 0) { 1116 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1117 " error status: %d\n", err); 1118 return; 1119 } 1120 1121 if (sc->adopted_rx_filter_bug) 1122 return; 1123 1124 if (ifp->if_flags & IFF_ALLMULTI) 1125 /* request to disable multicast filtering, so quit here */ 1126 return; 1127 1128 /* Flush all the filters */ 1129 1130 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1131 if (err != 0) { 1132 device_printf(sc->dev, 1133 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1134 ", error status: %d\n", err); 1135 return; 1136 } 1137 1138 /* Walk the multicast list, and add each address */ 1139 1140 if_maddr_rlock(ifp); 1141 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1142 if (ifma->ifma_addr->sa_family != AF_LINK) 1143 continue; 1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1145 &cmd.data0, 4); 1146 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1147 &cmd.data1, 2); 1148 cmd.data0 = htonl(cmd.data0); 1149 cmd.data1 = htonl(cmd.data1); 1150 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1151 if (err != 0) { 1152 device_printf(sc->dev, "Failed " 1153 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1154 "%d\t", err); 1155 /* abort, leaving multicast filtering off */ 1156 if_maddr_runlock(ifp); 1157 return; 1158 } 1159 } 1160 if_maddr_runlock(ifp); 1161 /* Enable multicast filtering */ 1162 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1163 if (err != 0) { 1164 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1165 ", error status: %d\n", err); 1166 } 1167 } 1168 1169 static int 1170 mxge_max_mtu(mxge_softc_t *sc) 1171 { 1172 mxge_cmd_t cmd; 1173 int status; 1174 1175 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1176 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1177 1178 /* try to set nbufs to see if it we can 1179 use virtually contiguous jumbos */ 1180 cmd.data0 = 0; 1181 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1182 &cmd); 1183 if (status == 0) 1184 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1185 1186 /* otherwise, we're limited to MJUMPAGESIZE */ 1187 return MJUMPAGESIZE - MXGEFW_PAD; 1188 } 1189 1190 static int 1191 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1192 { 1193 struct mxge_slice_state *ss; 1194 mxge_rx_done_t *rx_done; 1195 volatile uint32_t *irq_claim; 1196 mxge_cmd_t cmd; 1197 int slice, status; 1198 1199 /* try to send a reset command to the card to see if it 1200 is alive */ 1201 memset(&cmd, 0, sizeof (cmd)); 1202 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1203 if (status != 0) { 1204 device_printf(sc->dev, "failed reset\n"); 1205 return ENXIO; 1206 } 1207 1208 mxge_dummy_rdma(sc, 1); 1209 1210 1211 /* set the intrq size */ 1212 cmd.data0 = sc->rx_ring_size; 1213 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1214 1215 /* 1216 * Even though we already know how many slices are supported 1217 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1218 * has magic side effects, and must be called after a reset. 1219 * It must be called prior to calling any RSS related cmds, 1220 * including assigning an interrupt queue for anything but 1221 * slice 0. It must also be called *after* 1222 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1223 * the firmware to compute offsets. 1224 */ 1225 1226 if (sc->num_slices > 1) { 1227 /* ask the maximum number of slices it supports */ 1228 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1229 &cmd); 1230 if (status != 0) { 1231 device_printf(sc->dev, 1232 "failed to get number of slices\n"); 1233 return status; 1234 } 1235 /* 1236 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1237 * to setting up the interrupt queue DMA 1238 */ 1239 cmd.data0 = sc->num_slices; 1240 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1241 #ifdef IFNET_BUF_RING 1242 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1243 #endif 1244 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1245 &cmd); 1246 if (status != 0) { 1247 device_printf(sc->dev, 1248 "failed to set number of slices\n"); 1249 return status; 1250 } 1251 } 1252 1253 1254 if (interrupts_setup) { 1255 /* Now exchange information about interrupts */ 1256 for (slice = 0; slice < sc->num_slices; slice++) { 1257 rx_done = &sc->ss[slice].rx_done; 1258 memset(rx_done->entry, 0, sc->rx_ring_size); 1259 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1260 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1261 cmd.data2 = slice; 1262 status |= mxge_send_cmd(sc, 1263 MXGEFW_CMD_SET_INTRQ_DMA, 1264 &cmd); 1265 } 1266 } 1267 1268 status |= mxge_send_cmd(sc, 1269 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1270 1271 1272 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1273 1274 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1275 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1276 1277 1278 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1279 &cmd); 1280 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1281 if (status != 0) { 1282 device_printf(sc->dev, "failed set interrupt parameters\n"); 1283 return status; 1284 } 1285 1286 1287 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1288 1289 1290 /* run a DMA benchmark */ 1291 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1292 1293 for (slice = 0; slice < sc->num_slices; slice++) { 1294 ss = &sc->ss[slice]; 1295 1296 ss->irq_claim = irq_claim + (2 * slice); 1297 /* reset mcp/driver shared state back to 0 */ 1298 ss->rx_done.idx = 0; 1299 ss->rx_done.cnt = 0; 1300 ss->tx.req = 0; 1301 ss->tx.done = 0; 1302 ss->tx.pkt_done = 0; 1303 ss->tx.queue_active = 0; 1304 ss->tx.activate = 0; 1305 ss->tx.deactivate = 0; 1306 ss->tx.wake = 0; 1307 ss->tx.defrag = 0; 1308 ss->tx.stall = 0; 1309 ss->rx_big.cnt = 0; 1310 ss->rx_small.cnt = 0; 1311 ss->lro_bad_csum = 0; 1312 ss->lro_queued = 0; 1313 ss->lro_flushed = 0; 1314 if (ss->fw_stats != NULL) { 1315 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1316 } 1317 } 1318 sc->rdma_tags_available = 15; 1319 status = mxge_update_mac_address(sc); 1320 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1321 mxge_change_pause(sc, sc->pause); 1322 mxge_set_multicast_list(sc); 1323 if (sc->throttle) { 1324 cmd.data0 = sc->throttle; 1325 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1326 &cmd)) { 1327 device_printf(sc->dev, 1328 "can't enable throttle\n"); 1329 } 1330 } 1331 return status; 1332 } 1333 1334 static int 1335 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1336 { 1337 mxge_cmd_t cmd; 1338 mxge_softc_t *sc; 1339 int err; 1340 unsigned int throttle; 1341 1342 sc = arg1; 1343 throttle = sc->throttle; 1344 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1345 if (err != 0) { 1346 return err; 1347 } 1348 1349 if (throttle == sc->throttle) 1350 return 0; 1351 1352 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1353 return EINVAL; 1354 1355 mtx_lock(&sc->driver_mtx); 1356 cmd.data0 = throttle; 1357 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1358 if (err == 0) 1359 sc->throttle = throttle; 1360 mtx_unlock(&sc->driver_mtx); 1361 return err; 1362 } 1363 1364 static int 1365 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1366 { 1367 mxge_softc_t *sc; 1368 unsigned int intr_coal_delay; 1369 int err; 1370 1371 sc = arg1; 1372 intr_coal_delay = sc->intr_coal_delay; 1373 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1374 if (err != 0) { 1375 return err; 1376 } 1377 if (intr_coal_delay == sc->intr_coal_delay) 1378 return 0; 1379 1380 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1381 return EINVAL; 1382 1383 mtx_lock(&sc->driver_mtx); 1384 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1385 sc->intr_coal_delay = intr_coal_delay; 1386 1387 mtx_unlock(&sc->driver_mtx); 1388 return err; 1389 } 1390 1391 static int 1392 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1393 { 1394 mxge_softc_t *sc; 1395 unsigned int enabled; 1396 int err; 1397 1398 sc = arg1; 1399 enabled = sc->pause; 1400 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1401 if (err != 0) { 1402 return err; 1403 } 1404 if (enabled == sc->pause) 1405 return 0; 1406 1407 mtx_lock(&sc->driver_mtx); 1408 err = mxge_change_pause(sc, enabled); 1409 mtx_unlock(&sc->driver_mtx); 1410 return err; 1411 } 1412 1413 static int 1414 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1415 { 1416 struct ifnet *ifp; 1417 int err = 0; 1418 1419 ifp = sc->ifp; 1420 if (lro_cnt == 0) 1421 ifp->if_capenable &= ~IFCAP_LRO; 1422 else 1423 ifp->if_capenable |= IFCAP_LRO; 1424 sc->lro_cnt = lro_cnt; 1425 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1426 mxge_close(sc, 0); 1427 err = mxge_open(sc); 1428 } 1429 return err; 1430 } 1431 1432 static int 1433 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1434 { 1435 mxge_softc_t *sc; 1436 unsigned int lro_cnt; 1437 int err; 1438 1439 sc = arg1; 1440 lro_cnt = sc->lro_cnt; 1441 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1442 if (err != 0) 1443 return err; 1444 1445 if (lro_cnt == sc->lro_cnt) 1446 return 0; 1447 1448 if (lro_cnt > 128) 1449 return EINVAL; 1450 1451 mtx_lock(&sc->driver_mtx); 1452 err = mxge_change_lro_locked(sc, lro_cnt); 1453 mtx_unlock(&sc->driver_mtx); 1454 return err; 1455 } 1456 1457 static int 1458 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1459 { 1460 int err; 1461 1462 if (arg1 == NULL) 1463 return EFAULT; 1464 arg2 = be32toh(*(int *)arg1); 1465 arg1 = NULL; 1466 err = sysctl_handle_int(oidp, arg1, arg2, req); 1467 1468 return err; 1469 } 1470 1471 static void 1472 mxge_rem_sysctls(mxge_softc_t *sc) 1473 { 1474 struct mxge_slice_state *ss; 1475 int slice; 1476 1477 if (sc->slice_sysctl_tree == NULL) 1478 return; 1479 1480 for (slice = 0; slice < sc->num_slices; slice++) { 1481 ss = &sc->ss[slice]; 1482 if (ss == NULL || ss->sysctl_tree == NULL) 1483 continue; 1484 sysctl_ctx_free(&ss->sysctl_ctx); 1485 ss->sysctl_tree = NULL; 1486 } 1487 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1488 sc->slice_sysctl_tree = NULL; 1489 } 1490 1491 static void 1492 mxge_add_sysctls(mxge_softc_t *sc) 1493 { 1494 struct sysctl_ctx_list *ctx; 1495 struct sysctl_oid_list *children; 1496 mcp_irq_data_t *fw; 1497 struct mxge_slice_state *ss; 1498 int slice; 1499 char slice_num[8]; 1500 1501 ctx = device_get_sysctl_ctx(sc->dev); 1502 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1503 fw = sc->ss[0].fw_stats; 1504 1505 /* random information */ 1506 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1507 "firmware_version", 1508 CTLFLAG_RD, &sc->fw_version, 1509 0, "firmware version"); 1510 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1511 "serial_number", 1512 CTLFLAG_RD, &sc->serial_number_string, 1513 0, "serial number"); 1514 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1515 "product_code", 1516 CTLFLAG_RD, &sc->product_code_string, 1517 0, "product_code"); 1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1519 "pcie_link_width", 1520 CTLFLAG_RD, &sc->link_width, 1521 0, "tx_boundary"); 1522 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1523 "tx_boundary", 1524 CTLFLAG_RD, &sc->tx_boundary, 1525 0, "tx_boundary"); 1526 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1527 "write_combine", 1528 CTLFLAG_RD, &sc->wc, 1529 0, "write combining PIO?"); 1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1531 "read_dma_MBs", 1532 CTLFLAG_RD, &sc->read_dma, 1533 0, "DMA Read speed in MB/s"); 1534 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1535 "write_dma_MBs", 1536 CTLFLAG_RD, &sc->write_dma, 1537 0, "DMA Write speed in MB/s"); 1538 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1539 "read_write_dma_MBs", 1540 CTLFLAG_RD, &sc->read_write_dma, 1541 0, "DMA concurrent Read/Write speed in MB/s"); 1542 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1543 "watchdog_resets", 1544 CTLFLAG_RD, &sc->watchdog_resets, 1545 0, "Number of times NIC was reset"); 1546 1547 1548 /* performance related tunables */ 1549 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1550 "intr_coal_delay", 1551 CTLTYPE_INT|CTLFLAG_RW, sc, 1552 0, mxge_change_intr_coal, 1553 "I", "interrupt coalescing delay in usecs"); 1554 1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1556 "throttle", 1557 CTLTYPE_INT|CTLFLAG_RW, sc, 1558 0, mxge_change_throttle, 1559 "I", "transmit throttling"); 1560 1561 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1562 "flow_control_enabled", 1563 CTLTYPE_INT|CTLFLAG_RW, sc, 1564 0, mxge_change_flow_control, 1565 "I", "interrupt coalescing delay in usecs"); 1566 1567 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1568 "deassert_wait", 1569 CTLFLAG_RW, &mxge_deassert_wait, 1570 0, "Wait for IRQ line to go low in ihandler"); 1571 1572 /* stats block from firmware is in network byte order. 1573 Need to swap it */ 1574 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1575 "link_up", 1576 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1577 0, mxge_handle_be32, 1578 "I", "link up"); 1579 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1580 "rdma_tags_available", 1581 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1582 0, mxge_handle_be32, 1583 "I", "rdma_tags_available"); 1584 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1585 "dropped_bad_crc32", 1586 CTLTYPE_INT|CTLFLAG_RD, 1587 &fw->dropped_bad_crc32, 1588 0, mxge_handle_be32, 1589 "I", "dropped_bad_crc32"); 1590 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1591 "dropped_bad_phy", 1592 CTLTYPE_INT|CTLFLAG_RD, 1593 &fw->dropped_bad_phy, 1594 0, mxge_handle_be32, 1595 "I", "dropped_bad_phy"); 1596 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1597 "dropped_link_error_or_filtered", 1598 CTLTYPE_INT|CTLFLAG_RD, 1599 &fw->dropped_link_error_or_filtered, 1600 0, mxge_handle_be32, 1601 "I", "dropped_link_error_or_filtered"); 1602 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1603 "dropped_link_overflow", 1604 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1605 0, mxge_handle_be32, 1606 "I", "dropped_link_overflow"); 1607 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1608 "dropped_multicast_filtered", 1609 CTLTYPE_INT|CTLFLAG_RD, 1610 &fw->dropped_multicast_filtered, 1611 0, mxge_handle_be32, 1612 "I", "dropped_multicast_filtered"); 1613 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1614 "dropped_no_big_buffer", 1615 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1616 0, mxge_handle_be32, 1617 "I", "dropped_no_big_buffer"); 1618 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1619 "dropped_no_small_buffer", 1620 CTLTYPE_INT|CTLFLAG_RD, 1621 &fw->dropped_no_small_buffer, 1622 0, mxge_handle_be32, 1623 "I", "dropped_no_small_buffer"); 1624 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1625 "dropped_overrun", 1626 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1627 0, mxge_handle_be32, 1628 "I", "dropped_overrun"); 1629 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1630 "dropped_pause", 1631 CTLTYPE_INT|CTLFLAG_RD, 1632 &fw->dropped_pause, 1633 0, mxge_handle_be32, 1634 "I", "dropped_pause"); 1635 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1636 "dropped_runt", 1637 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1638 0, mxge_handle_be32, 1639 "I", "dropped_runt"); 1640 1641 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1642 "dropped_unicast_filtered", 1643 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1644 0, mxge_handle_be32, 1645 "I", "dropped_unicast_filtered"); 1646 1647 /* verbose printing? */ 1648 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1649 "verbose", 1650 CTLFLAG_RW, &mxge_verbose, 1651 0, "verbose printing"); 1652 1653 /* lro */ 1654 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1655 "lro_cnt", 1656 CTLTYPE_INT|CTLFLAG_RW, sc, 1657 0, mxge_change_lro, 1658 "I", "number of lro merge queues"); 1659 1660 1661 /* add counters exported for debugging from all slices */ 1662 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1663 sc->slice_sysctl_tree = 1664 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1665 "slice", CTLFLAG_RD, 0, ""); 1666 1667 for (slice = 0; slice < sc->num_slices; slice++) { 1668 ss = &sc->ss[slice]; 1669 sysctl_ctx_init(&ss->sysctl_ctx); 1670 ctx = &ss->sysctl_ctx; 1671 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1672 sprintf(slice_num, "%d", slice); 1673 ss->sysctl_tree = 1674 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1675 CTLFLAG_RD, 0, ""); 1676 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1677 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1678 "rx_small_cnt", 1679 CTLFLAG_RD, &ss->rx_small.cnt, 1680 0, "rx_small_cnt"); 1681 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1682 "rx_big_cnt", 1683 CTLFLAG_RD, &ss->rx_big.cnt, 1684 0, "rx_small_cnt"); 1685 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1686 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1687 0, "number of lro merge queues flushed"); 1688 1689 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1690 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1691 0, "number of frames appended to lro merge" 1692 "queues"); 1693 1694 #ifndef IFNET_BUF_RING 1695 /* only transmit from slice 0 for now */ 1696 if (slice > 0) 1697 continue; 1698 #endif 1699 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1700 "tx_req", 1701 CTLFLAG_RD, &ss->tx.req, 1702 0, "tx_req"); 1703 1704 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1705 "tx_done", 1706 CTLFLAG_RD, &ss->tx.done, 1707 0, "tx_done"); 1708 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1709 "tx_pkt_done", 1710 CTLFLAG_RD, &ss->tx.pkt_done, 1711 0, "tx_done"); 1712 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1713 "tx_stall", 1714 CTLFLAG_RD, &ss->tx.stall, 1715 0, "tx_stall"); 1716 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1717 "tx_wake", 1718 CTLFLAG_RD, &ss->tx.wake, 1719 0, "tx_wake"); 1720 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1721 "tx_defrag", 1722 CTLFLAG_RD, &ss->tx.defrag, 1723 0, "tx_defrag"); 1724 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1725 "tx_queue_active", 1726 CTLFLAG_RD, &ss->tx.queue_active, 1727 0, "tx_queue_active"); 1728 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1729 "tx_activate", 1730 CTLFLAG_RD, &ss->tx.activate, 1731 0, "tx_activate"); 1732 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1733 "tx_deactivate", 1734 CTLFLAG_RD, &ss->tx.deactivate, 1735 0, "tx_deactivate"); 1736 } 1737 } 1738 1739 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1740 backwards one at a time and handle ring wraps */ 1741 1742 static inline void 1743 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1744 mcp_kreq_ether_send_t *src, int cnt) 1745 { 1746 int idx, starting_slot; 1747 starting_slot = tx->req; 1748 while (cnt > 1) { 1749 cnt--; 1750 idx = (starting_slot + cnt) & tx->mask; 1751 mxge_pio_copy(&tx->lanai[idx], 1752 &src[cnt], sizeof(*src)); 1753 wmb(); 1754 } 1755 } 1756 1757 /* 1758 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1759 * at most 32 bytes at a time, so as to avoid involving the software 1760 * pio handler in the nic. We re-write the first segment's flags 1761 * to mark them valid only after writing the entire chain 1762 */ 1763 1764 static inline void 1765 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1766 int cnt) 1767 { 1768 int idx, i; 1769 uint32_t *src_ints; 1770 volatile uint32_t *dst_ints; 1771 mcp_kreq_ether_send_t *srcp; 1772 volatile mcp_kreq_ether_send_t *dstp, *dst; 1773 uint8_t last_flags; 1774 1775 idx = tx->req & tx->mask; 1776 1777 last_flags = src->flags; 1778 src->flags = 0; 1779 wmb(); 1780 dst = dstp = &tx->lanai[idx]; 1781 srcp = src; 1782 1783 if ((idx + cnt) < tx->mask) { 1784 for (i = 0; i < (cnt - 1); i += 2) { 1785 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1786 wmb(); /* force write every 32 bytes */ 1787 srcp += 2; 1788 dstp += 2; 1789 } 1790 } else { 1791 /* submit all but the first request, and ensure 1792 that it is submitted below */ 1793 mxge_submit_req_backwards(tx, src, cnt); 1794 i = 0; 1795 } 1796 if (i < cnt) { 1797 /* submit the first request */ 1798 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1799 wmb(); /* barrier before setting valid flag */ 1800 } 1801 1802 /* re-write the last 32-bits with the valid flags */ 1803 src->flags = last_flags; 1804 src_ints = (uint32_t *)src; 1805 src_ints+=3; 1806 dst_ints = (volatile uint32_t *)dst; 1807 dst_ints+=3; 1808 *dst_ints = *src_ints; 1809 tx->req += cnt; 1810 wmb(); 1811 } 1812 1813 #if IFCAP_TSO4 1814 1815 static void 1816 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1817 int busdma_seg_cnt, int ip_off) 1818 { 1819 mxge_tx_ring_t *tx; 1820 mcp_kreq_ether_send_t *req; 1821 bus_dma_segment_t *seg; 1822 struct ip *ip; 1823 struct tcphdr *tcp; 1824 uint32_t low, high_swapped; 1825 int len, seglen, cum_len, cum_len_next; 1826 int next_is_first, chop, cnt, rdma_count, small; 1827 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1828 uint8_t flags, flags_next; 1829 static int once; 1830 1831 mss = m->m_pkthdr.tso_segsz; 1832 1833 /* negative cum_len signifies to the 1834 * send loop that we are still in the 1835 * header portion of the TSO packet. 1836 */ 1837 1838 /* ensure we have the ethernet, IP and TCP 1839 header together in the first mbuf, copy 1840 it to a scratch buffer if not */ 1841 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1842 m_copydata(m, 0, ip_off + sizeof (*ip), 1843 ss->scratch); 1844 ip = (struct ip *)(ss->scratch + ip_off); 1845 } else { 1846 ip = (struct ip *)(mtod(m, char *) + ip_off); 1847 } 1848 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1849 + sizeof (*tcp))) { 1850 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1851 + sizeof (*tcp), ss->scratch); 1852 ip = (struct ip *)(mtod(m, char *) + ip_off); 1853 } 1854 1855 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1856 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1857 cksum_offset = ip_off + (ip->ip_hl << 2); 1858 1859 /* TSO implies checksum offload on this hardware */ 1860 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) { 1861 /* 1862 * If packet has full TCP csum, replace it with pseudo hdr 1863 * sum that the NIC expects, otherwise the NIC will emit 1864 * packets with bad TCP checksums. 1865 */ 1866 m->m_pkthdr.csum_flags = CSUM_TCP; 1867 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1868 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 1869 htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset))); 1870 } 1871 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1872 1873 1874 /* for TSO, pseudo_hdr_offset holds mss. 1875 * The firmware figures out where to put 1876 * the checksum by parsing the header. */ 1877 pseudo_hdr_offset = htobe16(mss); 1878 1879 tx = &ss->tx; 1880 req = tx->req_list; 1881 seg = tx->seg_list; 1882 cnt = 0; 1883 rdma_count = 0; 1884 /* "rdma_count" is the number of RDMAs belonging to the 1885 * current packet BEFORE the current send request. For 1886 * non-TSO packets, this is equal to "count". 1887 * For TSO packets, rdma_count needs to be reset 1888 * to 0 after a segment cut. 1889 * 1890 * The rdma_count field of the send request is 1891 * the number of RDMAs of the packet starting at 1892 * that request. For TSO send requests with one ore more cuts 1893 * in the middle, this is the number of RDMAs starting 1894 * after the last cut in the request. All previous 1895 * segments before the last cut implicitly have 1 RDMA. 1896 * 1897 * Since the number of RDMAs is not known beforehand, 1898 * it must be filled-in retroactively - after each 1899 * segmentation cut or at the end of the entire packet. 1900 */ 1901 1902 while (busdma_seg_cnt) { 1903 /* Break the busdma segment up into pieces*/ 1904 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1905 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1906 len = seg->ds_len; 1907 1908 while (len) { 1909 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1910 seglen = len; 1911 cum_len_next = cum_len + seglen; 1912 (req-rdma_count)->rdma_count = rdma_count + 1; 1913 if (__predict_true(cum_len >= 0)) { 1914 /* payload */ 1915 chop = (cum_len_next > mss); 1916 cum_len_next = cum_len_next % mss; 1917 next_is_first = (cum_len_next == 0); 1918 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1919 flags_next |= next_is_first * 1920 MXGEFW_FLAGS_FIRST; 1921 rdma_count |= -(chop | next_is_first); 1922 rdma_count += chop & !next_is_first; 1923 } else if (cum_len_next >= 0) { 1924 /* header ends */ 1925 rdma_count = -1; 1926 cum_len_next = 0; 1927 seglen = -cum_len; 1928 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1929 flags_next = MXGEFW_FLAGS_TSO_PLD | 1930 MXGEFW_FLAGS_FIRST | 1931 (small * MXGEFW_FLAGS_SMALL); 1932 } 1933 1934 req->addr_high = high_swapped; 1935 req->addr_low = htobe32(low); 1936 req->pseudo_hdr_offset = pseudo_hdr_offset; 1937 req->pad = 0; 1938 req->rdma_count = 1; 1939 req->length = htobe16(seglen); 1940 req->cksum_offset = cksum_offset; 1941 req->flags = flags | ((cum_len & 1) * 1942 MXGEFW_FLAGS_ALIGN_ODD); 1943 low += seglen; 1944 len -= seglen; 1945 cum_len = cum_len_next; 1946 flags = flags_next; 1947 req++; 1948 cnt++; 1949 rdma_count++; 1950 if (__predict_false(cksum_offset > seglen)) 1951 cksum_offset -= seglen; 1952 else 1953 cksum_offset = 0; 1954 if (__predict_false(cnt > tx->max_desc)) 1955 goto drop; 1956 } 1957 busdma_seg_cnt--; 1958 seg++; 1959 } 1960 (req-rdma_count)->rdma_count = rdma_count; 1961 1962 do { 1963 req--; 1964 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1965 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1966 1967 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1968 mxge_submit_req(tx, tx->req_list, cnt); 1969 #ifdef IFNET_BUF_RING 1970 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1971 /* tell the NIC to start polling this slice */ 1972 *tx->send_go = 1; 1973 tx->queue_active = 1; 1974 tx->activate++; 1975 wmb(); 1976 } 1977 #endif 1978 return; 1979 1980 drop: 1981 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1982 m_freem(m); 1983 ss->oerrors++; 1984 if (!once) { 1985 printf("tx->max_desc exceeded via TSO!\n"); 1986 printf("mss = %d, %ld, %d!\n", mss, 1987 (long)seg - (long)tx->seg_list, tx->max_desc); 1988 once = 1; 1989 } 1990 return; 1991 1992 } 1993 1994 #endif /* IFCAP_TSO4 */ 1995 1996 #ifdef MXGE_NEW_VLAN_API 1997 /* 1998 * We reproduce the software vlan tag insertion from 1999 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2000 * vlan tag insertion. We need to advertise this in order to have the 2001 * vlan interface respect our csum offload flags. 2002 */ 2003 static struct mbuf * 2004 mxge_vlan_tag_insert(struct mbuf *m) 2005 { 2006 struct ether_vlan_header *evl; 2007 2008 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2009 if (__predict_false(m == NULL)) 2010 return NULL; 2011 if (m->m_len < sizeof(*evl)) { 2012 m = m_pullup(m, sizeof(*evl)); 2013 if (__predict_false(m == NULL)) 2014 return NULL; 2015 } 2016 /* 2017 * Transform the Ethernet header into an Ethernet header 2018 * with 802.1Q encapsulation. 2019 */ 2020 evl = mtod(m, struct ether_vlan_header *); 2021 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2022 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2023 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2024 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2025 m->m_flags &= ~M_VLANTAG; 2026 return m; 2027 } 2028 #endif /* MXGE_NEW_VLAN_API */ 2029 2030 static void 2031 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2032 { 2033 mxge_softc_t *sc; 2034 mcp_kreq_ether_send_t *req; 2035 bus_dma_segment_t *seg; 2036 struct mbuf *m_tmp; 2037 struct ifnet *ifp; 2038 mxge_tx_ring_t *tx; 2039 struct ip *ip; 2040 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2041 uint16_t pseudo_hdr_offset; 2042 uint8_t flags, cksum_offset; 2043 2044 2045 sc = ss->sc; 2046 ifp = sc->ifp; 2047 tx = &ss->tx; 2048 2049 ip_off = sizeof (struct ether_header); 2050 #ifdef MXGE_NEW_VLAN_API 2051 if (m->m_flags & M_VLANTAG) { 2052 m = mxge_vlan_tag_insert(m); 2053 if (__predict_false(m == NULL)) 2054 goto drop; 2055 ip_off += ETHER_VLAN_ENCAP_LEN; 2056 } 2057 #endif 2058 /* (try to) map the frame for DMA */ 2059 idx = tx->req & tx->mask; 2060 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2061 m, tx->seg_list, &cnt, 2062 BUS_DMA_NOWAIT); 2063 if (__predict_false(err == EFBIG)) { 2064 /* Too many segments in the chain. Try 2065 to defrag */ 2066 m_tmp = m_defrag(m, M_NOWAIT); 2067 if (m_tmp == NULL) { 2068 goto drop; 2069 } 2070 ss->tx.defrag++; 2071 m = m_tmp; 2072 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2073 tx->info[idx].map, 2074 m, tx->seg_list, &cnt, 2075 BUS_DMA_NOWAIT); 2076 } 2077 if (__predict_false(err != 0)) { 2078 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2079 " packet len = %d\n", err, m->m_pkthdr.len); 2080 goto drop; 2081 } 2082 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2083 BUS_DMASYNC_PREWRITE); 2084 tx->info[idx].m = m; 2085 2086 #if IFCAP_TSO4 2087 /* TSO is different enough, we handle it in another routine */ 2088 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2089 mxge_encap_tso(ss, m, cnt, ip_off); 2090 return; 2091 } 2092 #endif 2093 2094 req = tx->req_list; 2095 cksum_offset = 0; 2096 pseudo_hdr_offset = 0; 2097 flags = MXGEFW_FLAGS_NO_TSO; 2098 2099 /* checksum offloading? */ 2100 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2101 /* ensure ip header is in first mbuf, copy 2102 it to a scratch buffer if not */ 2103 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2104 m_copydata(m, 0, ip_off + sizeof (*ip), 2105 ss->scratch); 2106 ip = (struct ip *)(ss->scratch + ip_off); 2107 } else { 2108 ip = (struct ip *)(mtod(m, char *) + ip_off); 2109 } 2110 cksum_offset = ip_off + (ip->ip_hl << 2); 2111 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2112 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2113 req->cksum_offset = cksum_offset; 2114 flags |= MXGEFW_FLAGS_CKSUM; 2115 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2116 } else { 2117 odd_flag = 0; 2118 } 2119 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2120 flags |= MXGEFW_FLAGS_SMALL; 2121 2122 /* convert segments into a request list */ 2123 cum_len = 0; 2124 seg = tx->seg_list; 2125 req->flags = MXGEFW_FLAGS_FIRST; 2126 for (i = 0; i < cnt; i++) { 2127 req->addr_low = 2128 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2129 req->addr_high = 2130 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2131 req->length = htobe16(seg->ds_len); 2132 req->cksum_offset = cksum_offset; 2133 if (cksum_offset > seg->ds_len) 2134 cksum_offset -= seg->ds_len; 2135 else 2136 cksum_offset = 0; 2137 req->pseudo_hdr_offset = pseudo_hdr_offset; 2138 req->pad = 0; /* complete solid 16-byte block */ 2139 req->rdma_count = 1; 2140 req->flags |= flags | ((cum_len & 1) * odd_flag); 2141 cum_len += seg->ds_len; 2142 seg++; 2143 req++; 2144 req->flags = 0; 2145 } 2146 req--; 2147 /* pad runts to 60 bytes */ 2148 if (cum_len < 60) { 2149 req++; 2150 req->addr_low = 2151 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2152 req->addr_high = 2153 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2154 req->length = htobe16(60 - cum_len); 2155 req->cksum_offset = 0; 2156 req->pseudo_hdr_offset = pseudo_hdr_offset; 2157 req->pad = 0; /* complete solid 16-byte block */ 2158 req->rdma_count = 1; 2159 req->flags |= flags | ((cum_len & 1) * odd_flag); 2160 cnt++; 2161 } 2162 2163 tx->req_list[0].rdma_count = cnt; 2164 #if 0 2165 /* print what the firmware will see */ 2166 for (i = 0; i < cnt; i++) { 2167 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2168 "cso:%d, flags:0x%x, rdma:%d\n", 2169 i, (int)ntohl(tx->req_list[i].addr_high), 2170 (int)ntohl(tx->req_list[i].addr_low), 2171 (int)ntohs(tx->req_list[i].length), 2172 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2173 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2174 tx->req_list[i].rdma_count); 2175 } 2176 printf("--------------\n"); 2177 #endif 2178 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2179 mxge_submit_req(tx, tx->req_list, cnt); 2180 #ifdef IFNET_BUF_RING 2181 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2182 /* tell the NIC to start polling this slice */ 2183 *tx->send_go = 1; 2184 tx->queue_active = 1; 2185 tx->activate++; 2186 wmb(); 2187 } 2188 #endif 2189 return; 2190 2191 drop: 2192 m_freem(m); 2193 ss->oerrors++; 2194 return; 2195 } 2196 2197 #ifdef IFNET_BUF_RING 2198 static void 2199 mxge_qflush(struct ifnet *ifp) 2200 { 2201 mxge_softc_t *sc = ifp->if_softc; 2202 mxge_tx_ring_t *tx; 2203 struct mbuf *m; 2204 int slice; 2205 2206 for (slice = 0; slice < sc->num_slices; slice++) { 2207 tx = &sc->ss[slice].tx; 2208 mtx_lock(&tx->mtx); 2209 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2210 m_freem(m); 2211 mtx_unlock(&tx->mtx); 2212 } 2213 if_qflush(ifp); 2214 } 2215 2216 static inline void 2217 mxge_start_locked(struct mxge_slice_state *ss) 2218 { 2219 mxge_softc_t *sc; 2220 struct mbuf *m; 2221 struct ifnet *ifp; 2222 mxge_tx_ring_t *tx; 2223 2224 sc = ss->sc; 2225 ifp = sc->ifp; 2226 tx = &ss->tx; 2227 2228 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2229 m = drbr_dequeue(ifp, tx->br); 2230 if (m == NULL) { 2231 return; 2232 } 2233 /* let BPF see it */ 2234 BPF_MTAP(ifp, m); 2235 2236 /* give it to the nic */ 2237 mxge_encap(ss, m); 2238 } 2239 /* ran out of transmit slots */ 2240 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2241 && (!drbr_empty(ifp, tx->br))) { 2242 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2243 tx->stall++; 2244 } 2245 } 2246 2247 static int 2248 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2249 { 2250 mxge_softc_t *sc; 2251 struct ifnet *ifp; 2252 mxge_tx_ring_t *tx; 2253 int err; 2254 2255 sc = ss->sc; 2256 ifp = sc->ifp; 2257 tx = &ss->tx; 2258 2259 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2260 IFF_DRV_RUNNING) { 2261 err = drbr_enqueue(ifp, tx->br, m); 2262 return (err); 2263 } 2264 2265 if (!drbr_needs_enqueue(ifp, tx->br) && 2266 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2267 /* let BPF see it */ 2268 BPF_MTAP(ifp, m); 2269 /* give it to the nic */ 2270 mxge_encap(ss, m); 2271 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2272 return (err); 2273 } 2274 if (!drbr_empty(ifp, tx->br)) 2275 mxge_start_locked(ss); 2276 return (0); 2277 } 2278 2279 static int 2280 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2281 { 2282 mxge_softc_t *sc = ifp->if_softc; 2283 struct mxge_slice_state *ss; 2284 mxge_tx_ring_t *tx; 2285 int err = 0; 2286 int slice; 2287 2288 slice = m->m_pkthdr.flowid; 2289 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2290 2291 ss = &sc->ss[slice]; 2292 tx = &ss->tx; 2293 2294 if (mtx_trylock(&tx->mtx)) { 2295 err = mxge_transmit_locked(ss, m); 2296 mtx_unlock(&tx->mtx); 2297 } else { 2298 err = drbr_enqueue(ifp, tx->br, m); 2299 } 2300 2301 return (err); 2302 } 2303 2304 #else 2305 2306 static inline void 2307 mxge_start_locked(struct mxge_slice_state *ss) 2308 { 2309 mxge_softc_t *sc; 2310 struct mbuf *m; 2311 struct ifnet *ifp; 2312 mxge_tx_ring_t *tx; 2313 2314 sc = ss->sc; 2315 ifp = sc->ifp; 2316 tx = &ss->tx; 2317 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2318 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2319 if (m == NULL) { 2320 return; 2321 } 2322 /* let BPF see it */ 2323 BPF_MTAP(ifp, m); 2324 2325 /* give it to the nic */ 2326 mxge_encap(ss, m); 2327 } 2328 /* ran out of transmit slots */ 2329 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2330 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2331 tx->stall++; 2332 } 2333 } 2334 #endif 2335 static void 2336 mxge_start(struct ifnet *ifp) 2337 { 2338 mxge_softc_t *sc = ifp->if_softc; 2339 struct mxge_slice_state *ss; 2340 2341 /* only use the first slice for now */ 2342 ss = &sc->ss[0]; 2343 mtx_lock(&ss->tx.mtx); 2344 mxge_start_locked(ss); 2345 mtx_unlock(&ss->tx.mtx); 2346 } 2347 2348 /* 2349 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2350 * at most 32 bytes at a time, so as to avoid involving the software 2351 * pio handler in the nic. We re-write the first segment's low 2352 * DMA address to mark it valid only after we write the entire chunk 2353 * in a burst 2354 */ 2355 static inline void 2356 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2357 mcp_kreq_ether_recv_t *src) 2358 { 2359 uint32_t low; 2360 2361 low = src->addr_low; 2362 src->addr_low = 0xffffffff; 2363 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2364 wmb(); 2365 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2366 wmb(); 2367 src->addr_low = low; 2368 dst->addr_low = low; 2369 wmb(); 2370 } 2371 2372 static int 2373 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2374 { 2375 bus_dma_segment_t seg; 2376 struct mbuf *m; 2377 mxge_rx_ring_t *rx = &ss->rx_small; 2378 int cnt, err; 2379 2380 m = m_gethdr(M_NOWAIT, MT_DATA); 2381 if (m == NULL) { 2382 rx->alloc_fail++; 2383 err = ENOBUFS; 2384 goto done; 2385 } 2386 m->m_len = MHLEN; 2387 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2388 &seg, &cnt, BUS_DMA_NOWAIT); 2389 if (err != 0) { 2390 m_free(m); 2391 goto done; 2392 } 2393 rx->info[idx].m = m; 2394 rx->shadow[idx].addr_low = 2395 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2396 rx->shadow[idx].addr_high = 2397 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2398 2399 done: 2400 if ((idx & 7) == 7) 2401 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2402 return err; 2403 } 2404 2405 static int 2406 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2407 { 2408 bus_dma_segment_t seg[3]; 2409 struct mbuf *m; 2410 mxge_rx_ring_t *rx = &ss->rx_big; 2411 int cnt, err, i; 2412 2413 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2414 if (m == NULL) { 2415 rx->alloc_fail++; 2416 err = ENOBUFS; 2417 goto done; 2418 } 2419 m->m_len = rx->mlen; 2420 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2421 seg, &cnt, BUS_DMA_NOWAIT); 2422 if (err != 0) { 2423 m_free(m); 2424 goto done; 2425 } 2426 rx->info[idx].m = m; 2427 rx->shadow[idx].addr_low = 2428 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2429 rx->shadow[idx].addr_high = 2430 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2431 2432 #if MXGE_VIRT_JUMBOS 2433 for (i = 1; i < cnt; i++) { 2434 rx->shadow[idx + i].addr_low = 2435 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2436 rx->shadow[idx + i].addr_high = 2437 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2438 } 2439 #endif 2440 2441 done: 2442 for (i = 0; i < rx->nbufs; i++) { 2443 if ((idx & 7) == 7) { 2444 mxge_submit_8rx(&rx->lanai[idx - 7], 2445 &rx->shadow[idx - 7]); 2446 } 2447 idx++; 2448 } 2449 return err; 2450 } 2451 2452 /* 2453 * Myri10GE hardware checksums are not valid if the sender 2454 * padded the frame with non-zero padding. This is because 2455 * the firmware just does a simple 16-bit 1s complement 2456 * checksum across the entire frame, excluding the first 14 2457 * bytes. It is best to simply to check the checksum and 2458 * tell the stack about it only if the checksum is good 2459 */ 2460 2461 static inline uint16_t 2462 mxge_rx_csum(struct mbuf *m, int csum) 2463 { 2464 struct ether_header *eh; 2465 struct ip *ip; 2466 uint16_t c; 2467 2468 eh = mtod(m, struct ether_header *); 2469 2470 /* only deal with IPv4 TCP & UDP for now */ 2471 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2472 return 1; 2473 ip = (struct ip *)(eh + 1); 2474 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2475 ip->ip_p != IPPROTO_UDP)) 2476 return 1; 2477 #ifdef INET 2478 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2479 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2480 - (ip->ip_hl << 2) + ip->ip_p)); 2481 #else 2482 c = 1; 2483 #endif 2484 c ^= 0xffff; 2485 return (c); 2486 } 2487 2488 static void 2489 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2490 { 2491 struct ether_vlan_header *evl; 2492 struct ether_header *eh; 2493 uint32_t partial; 2494 2495 evl = mtod(m, struct ether_vlan_header *); 2496 eh = mtod(m, struct ether_header *); 2497 2498 /* 2499 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2500 * after what the firmware thought was the end of the ethernet 2501 * header. 2502 */ 2503 2504 /* put checksum into host byte order */ 2505 *csum = ntohs(*csum); 2506 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2507 (*csum) += ~partial; 2508 (*csum) += ((*csum) < ~partial); 2509 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2510 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2511 2512 /* restore checksum to network byte order; 2513 later consumers expect this */ 2514 *csum = htons(*csum); 2515 2516 /* save the tag */ 2517 #ifdef MXGE_NEW_VLAN_API 2518 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2519 #else 2520 { 2521 struct m_tag *mtag; 2522 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2523 M_NOWAIT); 2524 if (mtag == NULL) 2525 return; 2526 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2527 m_tag_prepend(m, mtag); 2528 } 2529 2530 #endif 2531 m->m_flags |= M_VLANTAG; 2532 2533 /* 2534 * Remove the 802.1q header by copying the Ethernet 2535 * addresses over it and adjusting the beginning of 2536 * the data in the mbuf. The encapsulated Ethernet 2537 * type field is already in place. 2538 */ 2539 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2540 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2541 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2542 } 2543 2544 2545 static inline void 2546 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2547 { 2548 mxge_softc_t *sc; 2549 struct ifnet *ifp; 2550 struct mbuf *m; 2551 struct ether_header *eh; 2552 mxge_rx_ring_t *rx; 2553 bus_dmamap_t old_map; 2554 int idx; 2555 uint16_t tcpudp_csum; 2556 2557 sc = ss->sc; 2558 ifp = sc->ifp; 2559 rx = &ss->rx_big; 2560 idx = rx->cnt & rx->mask; 2561 rx->cnt += rx->nbufs; 2562 /* save a pointer to the received mbuf */ 2563 m = rx->info[idx].m; 2564 /* try to replace the received mbuf */ 2565 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2566 /* drop the frame -- the old mbuf is re-cycled */ 2567 ifp->if_ierrors++; 2568 return; 2569 } 2570 2571 /* unmap the received buffer */ 2572 old_map = rx->info[idx].map; 2573 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2574 bus_dmamap_unload(rx->dmat, old_map); 2575 2576 /* swap the bus_dmamap_t's */ 2577 rx->info[idx].map = rx->extra_map; 2578 rx->extra_map = old_map; 2579 2580 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2581 * aligned */ 2582 m->m_data += MXGEFW_PAD; 2583 2584 m->m_pkthdr.rcvif = ifp; 2585 m->m_len = m->m_pkthdr.len = len; 2586 ss->ipackets++; 2587 eh = mtod(m, struct ether_header *); 2588 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2589 mxge_vlan_tag_remove(m, &csum); 2590 } 2591 /* if the checksum is valid, mark it in the mbuf header */ 2592 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2593 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2594 return; 2595 /* otherwise, it was a UDP frame, or a TCP frame which 2596 we could not do LRO on. Tell the stack that the 2597 checksum is good */ 2598 m->m_pkthdr.csum_data = 0xffff; 2599 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2600 } 2601 /* flowid only valid if RSS hashing is enabled */ 2602 if (sc->num_slices > 1) { 2603 m->m_pkthdr.flowid = (ss - sc->ss); 2604 m->m_flags |= M_FLOWID; 2605 } 2606 /* pass the frame up the stack */ 2607 (*ifp->if_input)(ifp, m); 2608 } 2609 2610 static inline void 2611 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2612 { 2613 mxge_softc_t *sc; 2614 struct ifnet *ifp; 2615 struct ether_header *eh; 2616 struct mbuf *m; 2617 mxge_rx_ring_t *rx; 2618 bus_dmamap_t old_map; 2619 int idx; 2620 uint16_t tcpudp_csum; 2621 2622 sc = ss->sc; 2623 ifp = sc->ifp; 2624 rx = &ss->rx_small; 2625 idx = rx->cnt & rx->mask; 2626 rx->cnt++; 2627 /* save a pointer to the received mbuf */ 2628 m = rx->info[idx].m; 2629 /* try to replace the received mbuf */ 2630 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2631 /* drop the frame -- the old mbuf is re-cycled */ 2632 ifp->if_ierrors++; 2633 return; 2634 } 2635 2636 /* unmap the received buffer */ 2637 old_map = rx->info[idx].map; 2638 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2639 bus_dmamap_unload(rx->dmat, old_map); 2640 2641 /* swap the bus_dmamap_t's */ 2642 rx->info[idx].map = rx->extra_map; 2643 rx->extra_map = old_map; 2644 2645 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2646 * aligned */ 2647 m->m_data += MXGEFW_PAD; 2648 2649 m->m_pkthdr.rcvif = ifp; 2650 m->m_len = m->m_pkthdr.len = len; 2651 ss->ipackets++; 2652 eh = mtod(m, struct ether_header *); 2653 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2654 mxge_vlan_tag_remove(m, &csum); 2655 } 2656 /* if the checksum is valid, mark it in the mbuf header */ 2657 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2658 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2659 return; 2660 /* otherwise, it was a UDP frame, or a TCP frame which 2661 we could not do LRO on. Tell the stack that the 2662 checksum is good */ 2663 m->m_pkthdr.csum_data = 0xffff; 2664 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2665 } 2666 /* flowid only valid if RSS hashing is enabled */ 2667 if (sc->num_slices > 1) { 2668 m->m_pkthdr.flowid = (ss - sc->ss); 2669 m->m_flags |= M_FLOWID; 2670 } 2671 /* pass the frame up the stack */ 2672 (*ifp->if_input)(ifp, m); 2673 } 2674 2675 static inline void 2676 mxge_clean_rx_done(struct mxge_slice_state *ss) 2677 { 2678 mxge_rx_done_t *rx_done = &ss->rx_done; 2679 int limit = 0; 2680 uint16_t length; 2681 uint16_t checksum; 2682 2683 2684 while (rx_done->entry[rx_done->idx].length != 0) { 2685 length = ntohs(rx_done->entry[rx_done->idx].length); 2686 rx_done->entry[rx_done->idx].length = 0; 2687 checksum = rx_done->entry[rx_done->idx].checksum; 2688 if (length <= (MHLEN - MXGEFW_PAD)) 2689 mxge_rx_done_small(ss, length, checksum); 2690 else 2691 mxge_rx_done_big(ss, length, checksum); 2692 rx_done->cnt++; 2693 rx_done->idx = rx_done->cnt & rx_done->mask; 2694 2695 /* limit potential for livelock */ 2696 if (__predict_false(++limit > rx_done->mask / 2)) 2697 break; 2698 } 2699 #ifdef INET 2700 while (!SLIST_EMPTY(&ss->lro_active)) { 2701 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2702 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2703 mxge_lro_flush(ss, lro); 2704 } 2705 #endif 2706 } 2707 2708 2709 static inline void 2710 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2711 { 2712 struct ifnet *ifp; 2713 mxge_tx_ring_t *tx; 2714 struct mbuf *m; 2715 bus_dmamap_t map; 2716 int idx; 2717 int *flags; 2718 2719 tx = &ss->tx; 2720 ifp = ss->sc->ifp; 2721 while (tx->pkt_done != mcp_idx) { 2722 idx = tx->done & tx->mask; 2723 tx->done++; 2724 m = tx->info[idx].m; 2725 /* mbuf and DMA map only attached to the first 2726 segment per-mbuf */ 2727 if (m != NULL) { 2728 ss->obytes += m->m_pkthdr.len; 2729 if (m->m_flags & M_MCAST) 2730 ss->omcasts++; 2731 ss->opackets++; 2732 tx->info[idx].m = NULL; 2733 map = tx->info[idx].map; 2734 bus_dmamap_unload(tx->dmat, map); 2735 m_freem(m); 2736 } 2737 if (tx->info[idx].flag) { 2738 tx->info[idx].flag = 0; 2739 tx->pkt_done++; 2740 } 2741 } 2742 2743 /* If we have space, clear IFF_OACTIVE to tell the stack that 2744 its OK to send packets */ 2745 #ifdef IFNET_BUF_RING 2746 flags = &ss->if_drv_flags; 2747 #else 2748 flags = &ifp->if_drv_flags; 2749 #endif 2750 mtx_lock(&ss->tx.mtx); 2751 if ((*flags) & IFF_DRV_OACTIVE && 2752 tx->req - tx->done < (tx->mask + 1)/4) { 2753 *(flags) &= ~IFF_DRV_OACTIVE; 2754 ss->tx.wake++; 2755 mxge_start_locked(ss); 2756 } 2757 #ifdef IFNET_BUF_RING 2758 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2759 /* let the NIC stop polling this queue, since there 2760 * are no more transmits pending */ 2761 if (tx->req == tx->done) { 2762 *tx->send_stop = 1; 2763 tx->queue_active = 0; 2764 tx->deactivate++; 2765 wmb(); 2766 } 2767 } 2768 #endif 2769 mtx_unlock(&ss->tx.mtx); 2770 2771 } 2772 2773 static struct mxge_media_type mxge_xfp_media_types[] = 2774 { 2775 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2776 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2777 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2778 {0, (1 << 5), "10GBASE-ER"}, 2779 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2780 {0, (1 << 3), "10GBASE-SW"}, 2781 {0, (1 << 2), "10GBASE-LW"}, 2782 {0, (1 << 1), "10GBASE-EW"}, 2783 {0, (1 << 0), "Reserved"} 2784 }; 2785 static struct mxge_media_type mxge_sfp_media_types[] = 2786 { 2787 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2788 {0, (1 << 7), "Reserved"}, 2789 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2790 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2791 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2792 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2793 }; 2794 2795 static void 2796 mxge_media_set(mxge_softc_t *sc, int media_type) 2797 { 2798 2799 2800 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2801 0, NULL); 2802 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2803 sc->current_media = media_type; 2804 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2805 } 2806 2807 static void 2808 mxge_media_init(mxge_softc_t *sc) 2809 { 2810 char *ptr; 2811 int i; 2812 2813 ifmedia_removeall(&sc->media); 2814 mxge_media_set(sc, IFM_AUTO); 2815 2816 /* 2817 * parse the product code to deterimine the interface type 2818 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2819 * after the 3rd dash in the driver's cached copy of the 2820 * EEPROM's product code string. 2821 */ 2822 ptr = sc->product_code_string; 2823 if (ptr == NULL) { 2824 device_printf(sc->dev, "Missing product code\n"); 2825 return; 2826 } 2827 2828 for (i = 0; i < 3; i++, ptr++) { 2829 ptr = strchr(ptr, '-'); 2830 if (ptr == NULL) { 2831 device_printf(sc->dev, 2832 "only %d dashes in PC?!?\n", i); 2833 return; 2834 } 2835 } 2836 if (*ptr == 'C' || *(ptr +1) == 'C') { 2837 /* -C is CX4 */ 2838 sc->connector = MXGE_CX4; 2839 mxge_media_set(sc, IFM_10G_CX4); 2840 } else if (*ptr == 'Q') { 2841 /* -Q is Quad Ribbon Fiber */ 2842 sc->connector = MXGE_QRF; 2843 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2844 /* FreeBSD has no media type for Quad ribbon fiber */ 2845 } else if (*ptr == 'R') { 2846 /* -R is XFP */ 2847 sc->connector = MXGE_XFP; 2848 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2849 /* -S or -2S is SFP+ */ 2850 sc->connector = MXGE_SFP; 2851 } else { 2852 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2853 } 2854 } 2855 2856 /* 2857 * Determine the media type for a NIC. Some XFPs will identify 2858 * themselves only when their link is up, so this is initiated via a 2859 * link up interrupt. However, this can potentially take up to 2860 * several milliseconds, so it is run via the watchdog routine, rather 2861 * than in the interrupt handler itself. 2862 */ 2863 static void 2864 mxge_media_probe(mxge_softc_t *sc) 2865 { 2866 mxge_cmd_t cmd; 2867 char *cage_type; 2868 2869 struct mxge_media_type *mxge_media_types = NULL; 2870 int i, err, ms, mxge_media_type_entries; 2871 uint32_t byte; 2872 2873 sc->need_media_probe = 0; 2874 2875 if (sc->connector == MXGE_XFP) { 2876 /* -R is XFP */ 2877 mxge_media_types = mxge_xfp_media_types; 2878 mxge_media_type_entries = 2879 sizeof (mxge_xfp_media_types) / 2880 sizeof (mxge_xfp_media_types[0]); 2881 byte = MXGE_XFP_COMPLIANCE_BYTE; 2882 cage_type = "XFP"; 2883 } else if (sc->connector == MXGE_SFP) { 2884 /* -S or -2S is SFP+ */ 2885 mxge_media_types = mxge_sfp_media_types; 2886 mxge_media_type_entries = 2887 sizeof (mxge_sfp_media_types) / 2888 sizeof (mxge_sfp_media_types[0]); 2889 cage_type = "SFP+"; 2890 byte = 3; 2891 } else { 2892 /* nothing to do; media type cannot change */ 2893 return; 2894 } 2895 2896 /* 2897 * At this point we know the NIC has an XFP cage, so now we 2898 * try to determine what is in the cage by using the 2899 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2900 * register. We read just one byte, which may take over 2901 * a millisecond 2902 */ 2903 2904 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2905 cmd.data1 = byte; 2906 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2907 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2908 device_printf(sc->dev, "failed to read XFP\n"); 2909 } 2910 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2911 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2912 } 2913 if (err != MXGEFW_CMD_OK) { 2914 return; 2915 } 2916 2917 /* now we wait for the data to be cached */ 2918 cmd.data0 = byte; 2919 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2920 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2921 DELAY(1000); 2922 cmd.data0 = byte; 2923 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2924 } 2925 if (err != MXGEFW_CMD_OK) { 2926 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2927 cage_type, err, ms); 2928 return; 2929 } 2930 2931 if (cmd.data0 == mxge_media_types[0].bitmask) { 2932 if (mxge_verbose) 2933 device_printf(sc->dev, "%s:%s\n", cage_type, 2934 mxge_media_types[0].name); 2935 if (sc->current_media != mxge_media_types[0].flag) { 2936 mxge_media_init(sc); 2937 mxge_media_set(sc, mxge_media_types[0].flag); 2938 } 2939 return; 2940 } 2941 for (i = 1; i < mxge_media_type_entries; i++) { 2942 if (cmd.data0 & mxge_media_types[i].bitmask) { 2943 if (mxge_verbose) 2944 device_printf(sc->dev, "%s:%s\n", 2945 cage_type, 2946 mxge_media_types[i].name); 2947 2948 if (sc->current_media != mxge_media_types[i].flag) { 2949 mxge_media_init(sc); 2950 mxge_media_set(sc, mxge_media_types[i].flag); 2951 } 2952 return; 2953 } 2954 } 2955 if (mxge_verbose) 2956 device_printf(sc->dev, "%s media 0x%x unknown\n", 2957 cage_type, cmd.data0); 2958 2959 return; 2960 } 2961 2962 static void 2963 mxge_intr(void *arg) 2964 { 2965 struct mxge_slice_state *ss = arg; 2966 mxge_softc_t *sc = ss->sc; 2967 mcp_irq_data_t *stats = ss->fw_stats; 2968 mxge_tx_ring_t *tx = &ss->tx; 2969 mxge_rx_done_t *rx_done = &ss->rx_done; 2970 uint32_t send_done_count; 2971 uint8_t valid; 2972 2973 2974 #ifndef IFNET_BUF_RING 2975 /* an interrupt on a non-zero slice is implicitly valid 2976 since MSI-X irqs are not shared */ 2977 if (ss != sc->ss) { 2978 mxge_clean_rx_done(ss); 2979 *ss->irq_claim = be32toh(3); 2980 return; 2981 } 2982 #endif 2983 2984 /* make sure the DMA has finished */ 2985 if (!stats->valid) { 2986 return; 2987 } 2988 valid = stats->valid; 2989 2990 if (sc->legacy_irq) { 2991 /* lower legacy IRQ */ 2992 *sc->irq_deassert = 0; 2993 if (!mxge_deassert_wait) 2994 /* don't wait for conf. that irq is low */ 2995 stats->valid = 0; 2996 } else { 2997 stats->valid = 0; 2998 } 2999 3000 /* loop while waiting for legacy irq deassertion */ 3001 do { 3002 /* check for transmit completes and receives */ 3003 send_done_count = be32toh(stats->send_done_count); 3004 while ((send_done_count != tx->pkt_done) || 3005 (rx_done->entry[rx_done->idx].length != 0)) { 3006 if (send_done_count != tx->pkt_done) 3007 mxge_tx_done(ss, (int)send_done_count); 3008 mxge_clean_rx_done(ss); 3009 send_done_count = be32toh(stats->send_done_count); 3010 } 3011 if (sc->legacy_irq && mxge_deassert_wait) 3012 wmb(); 3013 } while (*((volatile uint8_t *) &stats->valid)); 3014 3015 /* fw link & error stats meaningful only on the first slice */ 3016 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3017 if (sc->link_state != stats->link_up) { 3018 sc->link_state = stats->link_up; 3019 if (sc->link_state) { 3020 if_link_state_change(sc->ifp, LINK_STATE_UP); 3021 if_initbaudrate(sc->ifp, IF_Gbps(10)); 3022 if (mxge_verbose) 3023 device_printf(sc->dev, "link up\n"); 3024 } else { 3025 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3026 sc->ifp->if_baudrate = 0; 3027 if (mxge_verbose) 3028 device_printf(sc->dev, "link down\n"); 3029 } 3030 sc->need_media_probe = 1; 3031 } 3032 if (sc->rdma_tags_available != 3033 be32toh(stats->rdma_tags_available)) { 3034 sc->rdma_tags_available = 3035 be32toh(stats->rdma_tags_available); 3036 device_printf(sc->dev, "RDMA timed out! %d tags " 3037 "left\n", sc->rdma_tags_available); 3038 } 3039 3040 if (stats->link_down) { 3041 sc->down_cnt += stats->link_down; 3042 sc->link_state = 0; 3043 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3044 } 3045 } 3046 3047 /* check to see if we have rx token to pass back */ 3048 if (valid & 0x1) 3049 *ss->irq_claim = be32toh(3); 3050 *(ss->irq_claim + 1) = be32toh(3); 3051 } 3052 3053 static void 3054 mxge_init(void *arg) 3055 { 3056 mxge_softc_t *sc = arg; 3057 struct ifnet *ifp = sc->ifp; 3058 3059 3060 mtx_lock(&sc->driver_mtx); 3061 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3062 (void) mxge_open(sc); 3063 mtx_unlock(&sc->driver_mtx); 3064 } 3065 3066 3067 3068 static void 3069 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3070 { 3071 struct lro_entry *lro_entry; 3072 int i; 3073 3074 while (!SLIST_EMPTY(&ss->lro_free)) { 3075 lro_entry = SLIST_FIRST(&ss->lro_free); 3076 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3077 free(lro_entry, M_DEVBUF); 3078 } 3079 3080 for (i = 0; i <= ss->rx_big.mask; i++) { 3081 if (ss->rx_big.info[i].m == NULL) 3082 continue; 3083 bus_dmamap_unload(ss->rx_big.dmat, 3084 ss->rx_big.info[i].map); 3085 m_freem(ss->rx_big.info[i].m); 3086 ss->rx_big.info[i].m = NULL; 3087 } 3088 3089 for (i = 0; i <= ss->rx_small.mask; i++) { 3090 if (ss->rx_small.info[i].m == NULL) 3091 continue; 3092 bus_dmamap_unload(ss->rx_small.dmat, 3093 ss->rx_small.info[i].map); 3094 m_freem(ss->rx_small.info[i].m); 3095 ss->rx_small.info[i].m = NULL; 3096 } 3097 3098 /* transmit ring used only on the first slice */ 3099 if (ss->tx.info == NULL) 3100 return; 3101 3102 for (i = 0; i <= ss->tx.mask; i++) { 3103 ss->tx.info[i].flag = 0; 3104 if (ss->tx.info[i].m == NULL) 3105 continue; 3106 bus_dmamap_unload(ss->tx.dmat, 3107 ss->tx.info[i].map); 3108 m_freem(ss->tx.info[i].m); 3109 ss->tx.info[i].m = NULL; 3110 } 3111 } 3112 3113 static void 3114 mxge_free_mbufs(mxge_softc_t *sc) 3115 { 3116 int slice; 3117 3118 for (slice = 0; slice < sc->num_slices; slice++) 3119 mxge_free_slice_mbufs(&sc->ss[slice]); 3120 } 3121 3122 static void 3123 mxge_free_slice_rings(struct mxge_slice_state *ss) 3124 { 3125 int i; 3126 3127 3128 if (ss->rx_done.entry != NULL) 3129 mxge_dma_free(&ss->rx_done.dma); 3130 ss->rx_done.entry = NULL; 3131 3132 if (ss->tx.req_bytes != NULL) 3133 free(ss->tx.req_bytes, M_DEVBUF); 3134 ss->tx.req_bytes = NULL; 3135 3136 if (ss->tx.seg_list != NULL) 3137 free(ss->tx.seg_list, M_DEVBUF); 3138 ss->tx.seg_list = NULL; 3139 3140 if (ss->rx_small.shadow != NULL) 3141 free(ss->rx_small.shadow, M_DEVBUF); 3142 ss->rx_small.shadow = NULL; 3143 3144 if (ss->rx_big.shadow != NULL) 3145 free(ss->rx_big.shadow, M_DEVBUF); 3146 ss->rx_big.shadow = NULL; 3147 3148 if (ss->tx.info != NULL) { 3149 if (ss->tx.dmat != NULL) { 3150 for (i = 0; i <= ss->tx.mask; i++) { 3151 bus_dmamap_destroy(ss->tx.dmat, 3152 ss->tx.info[i].map); 3153 } 3154 bus_dma_tag_destroy(ss->tx.dmat); 3155 } 3156 free(ss->tx.info, M_DEVBUF); 3157 } 3158 ss->tx.info = NULL; 3159 3160 if (ss->rx_small.info != NULL) { 3161 if (ss->rx_small.dmat != NULL) { 3162 for (i = 0; i <= ss->rx_small.mask; i++) { 3163 bus_dmamap_destroy(ss->rx_small.dmat, 3164 ss->rx_small.info[i].map); 3165 } 3166 bus_dmamap_destroy(ss->rx_small.dmat, 3167 ss->rx_small.extra_map); 3168 bus_dma_tag_destroy(ss->rx_small.dmat); 3169 } 3170 free(ss->rx_small.info, M_DEVBUF); 3171 } 3172 ss->rx_small.info = NULL; 3173 3174 if (ss->rx_big.info != NULL) { 3175 if (ss->rx_big.dmat != NULL) { 3176 for (i = 0; i <= ss->rx_big.mask; i++) { 3177 bus_dmamap_destroy(ss->rx_big.dmat, 3178 ss->rx_big.info[i].map); 3179 } 3180 bus_dmamap_destroy(ss->rx_big.dmat, 3181 ss->rx_big.extra_map); 3182 bus_dma_tag_destroy(ss->rx_big.dmat); 3183 } 3184 free(ss->rx_big.info, M_DEVBUF); 3185 } 3186 ss->rx_big.info = NULL; 3187 } 3188 3189 static void 3190 mxge_free_rings(mxge_softc_t *sc) 3191 { 3192 int slice; 3193 3194 for (slice = 0; slice < sc->num_slices; slice++) 3195 mxge_free_slice_rings(&sc->ss[slice]); 3196 } 3197 3198 static int 3199 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3200 int tx_ring_entries) 3201 { 3202 mxge_softc_t *sc = ss->sc; 3203 size_t bytes; 3204 int err, i; 3205 3206 err = ENOMEM; 3207 3208 /* allocate per-slice receive resources */ 3209 3210 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3211 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3212 3213 /* allocate the rx shadow rings */ 3214 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3215 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3216 if (ss->rx_small.shadow == NULL) 3217 return err; 3218 3219 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3220 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3221 if (ss->rx_big.shadow == NULL) 3222 return err; 3223 3224 /* allocate the rx host info rings */ 3225 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3226 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3227 if (ss->rx_small.info == NULL) 3228 return err; 3229 3230 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3231 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3232 if (ss->rx_big.info == NULL) 3233 return err; 3234 3235 /* allocate the rx busdma resources */ 3236 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3237 1, /* alignment */ 3238 4096, /* boundary */ 3239 BUS_SPACE_MAXADDR, /* low */ 3240 BUS_SPACE_MAXADDR, /* high */ 3241 NULL, NULL, /* filter */ 3242 MHLEN, /* maxsize */ 3243 1, /* num segs */ 3244 MHLEN, /* maxsegsize */ 3245 BUS_DMA_ALLOCNOW, /* flags */ 3246 NULL, NULL, /* lock */ 3247 &ss->rx_small.dmat); /* tag */ 3248 if (err != 0) { 3249 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3250 err); 3251 return err; 3252 } 3253 3254 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3255 1, /* alignment */ 3256 #if MXGE_VIRT_JUMBOS 3257 4096, /* boundary */ 3258 #else 3259 0, /* boundary */ 3260 #endif 3261 BUS_SPACE_MAXADDR, /* low */ 3262 BUS_SPACE_MAXADDR, /* high */ 3263 NULL, NULL, /* filter */ 3264 3*4096, /* maxsize */ 3265 #if MXGE_VIRT_JUMBOS 3266 3, /* num segs */ 3267 4096, /* maxsegsize*/ 3268 #else 3269 1, /* num segs */ 3270 MJUM9BYTES, /* maxsegsize*/ 3271 #endif 3272 BUS_DMA_ALLOCNOW, /* flags */ 3273 NULL, NULL, /* lock */ 3274 &ss->rx_big.dmat); /* tag */ 3275 if (err != 0) { 3276 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3277 err); 3278 return err; 3279 } 3280 for (i = 0; i <= ss->rx_small.mask; i++) { 3281 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3282 &ss->rx_small.info[i].map); 3283 if (err != 0) { 3284 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3285 err); 3286 return err; 3287 } 3288 } 3289 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3290 &ss->rx_small.extra_map); 3291 if (err != 0) { 3292 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3293 err); 3294 return err; 3295 } 3296 3297 for (i = 0; i <= ss->rx_big.mask; i++) { 3298 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3299 &ss->rx_big.info[i].map); 3300 if (err != 0) { 3301 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3302 err); 3303 return err; 3304 } 3305 } 3306 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3307 &ss->rx_big.extra_map); 3308 if (err != 0) { 3309 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3310 err); 3311 return err; 3312 } 3313 3314 /* now allocate TX resouces */ 3315 3316 #ifndef IFNET_BUF_RING 3317 /* only use a single TX ring for now */ 3318 if (ss != ss->sc->ss) 3319 return 0; 3320 #endif 3321 3322 ss->tx.mask = tx_ring_entries - 1; 3323 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3324 3325 3326 /* allocate the tx request copy block */ 3327 bytes = 8 + 3328 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3329 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3330 if (ss->tx.req_bytes == NULL) 3331 return err; 3332 /* ensure req_list entries are aligned to 8 bytes */ 3333 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3334 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3335 3336 /* allocate the tx busdma segment list */ 3337 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3338 ss->tx.seg_list = (bus_dma_segment_t *) 3339 malloc(bytes, M_DEVBUF, M_WAITOK); 3340 if (ss->tx.seg_list == NULL) 3341 return err; 3342 3343 /* allocate the tx host info ring */ 3344 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3345 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3346 if (ss->tx.info == NULL) 3347 return err; 3348 3349 /* allocate the tx busdma resources */ 3350 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3351 1, /* alignment */ 3352 sc->tx_boundary, /* boundary */ 3353 BUS_SPACE_MAXADDR, /* low */ 3354 BUS_SPACE_MAXADDR, /* high */ 3355 NULL, NULL, /* filter */ 3356 65536 + 256, /* maxsize */ 3357 ss->tx.max_desc - 2, /* num segs */ 3358 sc->tx_boundary, /* maxsegsz */ 3359 BUS_DMA_ALLOCNOW, /* flags */ 3360 NULL, NULL, /* lock */ 3361 &ss->tx.dmat); /* tag */ 3362 3363 if (err != 0) { 3364 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3365 err); 3366 return err; 3367 } 3368 3369 /* now use these tags to setup dmamaps for each slot 3370 in the ring */ 3371 for (i = 0; i <= ss->tx.mask; i++) { 3372 err = bus_dmamap_create(ss->tx.dmat, 0, 3373 &ss->tx.info[i].map); 3374 if (err != 0) { 3375 device_printf(sc->dev, "Err %d tx dmamap\n", 3376 err); 3377 return err; 3378 } 3379 } 3380 return 0; 3381 3382 } 3383 3384 static int 3385 mxge_alloc_rings(mxge_softc_t *sc) 3386 { 3387 mxge_cmd_t cmd; 3388 int tx_ring_size; 3389 int tx_ring_entries, rx_ring_entries; 3390 int err, slice; 3391 3392 /* get ring sizes */ 3393 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3394 tx_ring_size = cmd.data0; 3395 if (err != 0) { 3396 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3397 goto abort; 3398 } 3399 3400 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3401 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3402 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3403 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3404 IFQ_SET_READY(&sc->ifp->if_snd); 3405 3406 for (slice = 0; slice < sc->num_slices; slice++) { 3407 err = mxge_alloc_slice_rings(&sc->ss[slice], 3408 rx_ring_entries, 3409 tx_ring_entries); 3410 if (err != 0) 3411 goto abort; 3412 } 3413 return 0; 3414 3415 abort: 3416 mxge_free_rings(sc); 3417 return err; 3418 3419 } 3420 3421 3422 static void 3423 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3424 { 3425 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3426 3427 if (bufsize < MCLBYTES) { 3428 /* easy, everything fits in a single buffer */ 3429 *big_buf_size = MCLBYTES; 3430 *cl_size = MCLBYTES; 3431 *nbufs = 1; 3432 return; 3433 } 3434 3435 if (bufsize < MJUMPAGESIZE) { 3436 /* still easy, everything still fits in a single buffer */ 3437 *big_buf_size = MJUMPAGESIZE; 3438 *cl_size = MJUMPAGESIZE; 3439 *nbufs = 1; 3440 return; 3441 } 3442 #if MXGE_VIRT_JUMBOS 3443 /* now we need to use virtually contiguous buffers */ 3444 *cl_size = MJUM9BYTES; 3445 *big_buf_size = 4096; 3446 *nbufs = mtu / 4096 + 1; 3447 /* needs to be a power of two, so round up */ 3448 if (*nbufs == 3) 3449 *nbufs = 4; 3450 #else 3451 *cl_size = MJUM9BYTES; 3452 *big_buf_size = MJUM9BYTES; 3453 *nbufs = 1; 3454 #endif 3455 } 3456 3457 static int 3458 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3459 { 3460 mxge_softc_t *sc; 3461 mxge_cmd_t cmd; 3462 bus_dmamap_t map; 3463 struct lro_entry *lro_entry; 3464 int err, i, slice; 3465 3466 3467 sc = ss->sc; 3468 slice = ss - sc->ss; 3469 3470 SLIST_INIT(&ss->lro_free); 3471 SLIST_INIT(&ss->lro_active); 3472 3473 for (i = 0; i < sc->lro_cnt; i++) { 3474 lro_entry = (struct lro_entry *) 3475 malloc(sizeof (*lro_entry), M_DEVBUF, 3476 M_NOWAIT | M_ZERO); 3477 if (lro_entry == NULL) { 3478 sc->lro_cnt = i; 3479 break; 3480 } 3481 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3482 } 3483 /* get the lanai pointers to the send and receive rings */ 3484 3485 err = 0; 3486 #ifndef IFNET_BUF_RING 3487 /* We currently only send from the first slice */ 3488 if (slice == 0) { 3489 #endif 3490 cmd.data0 = slice; 3491 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3492 ss->tx.lanai = 3493 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3494 ss->tx.send_go = (volatile uint32_t *) 3495 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3496 ss->tx.send_stop = (volatile uint32_t *) 3497 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3498 #ifndef IFNET_BUF_RING 3499 } 3500 #endif 3501 cmd.data0 = slice; 3502 err |= mxge_send_cmd(sc, 3503 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3504 ss->rx_small.lanai = 3505 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3506 cmd.data0 = slice; 3507 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3508 ss->rx_big.lanai = 3509 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3510 3511 if (err != 0) { 3512 device_printf(sc->dev, 3513 "failed to get ring sizes or locations\n"); 3514 return EIO; 3515 } 3516 3517 /* stock receive rings */ 3518 for (i = 0; i <= ss->rx_small.mask; i++) { 3519 map = ss->rx_small.info[i].map; 3520 err = mxge_get_buf_small(ss, map, i); 3521 if (err) { 3522 device_printf(sc->dev, "alloced %d/%d smalls\n", 3523 i, ss->rx_small.mask + 1); 3524 return ENOMEM; 3525 } 3526 } 3527 for (i = 0; i <= ss->rx_big.mask; i++) { 3528 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3529 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3530 } 3531 ss->rx_big.nbufs = nbufs; 3532 ss->rx_big.cl_size = cl_size; 3533 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3534 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3535 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3536 map = ss->rx_big.info[i].map; 3537 err = mxge_get_buf_big(ss, map, i); 3538 if (err) { 3539 device_printf(sc->dev, "alloced %d/%d bigs\n", 3540 i, ss->rx_big.mask + 1); 3541 return ENOMEM; 3542 } 3543 } 3544 return 0; 3545 } 3546 3547 static int 3548 mxge_open(mxge_softc_t *sc) 3549 { 3550 mxge_cmd_t cmd; 3551 int err, big_bytes, nbufs, slice, cl_size, i; 3552 bus_addr_t bus; 3553 volatile uint8_t *itable; 3554 struct mxge_slice_state *ss; 3555 3556 /* Copy the MAC address in case it was overridden */ 3557 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3558 3559 err = mxge_reset(sc, 1); 3560 if (err != 0) { 3561 device_printf(sc->dev, "failed to reset\n"); 3562 return EIO; 3563 } 3564 3565 if (sc->num_slices > 1) { 3566 /* setup the indirection table */ 3567 cmd.data0 = sc->num_slices; 3568 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3569 &cmd); 3570 3571 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3572 &cmd); 3573 if (err != 0) { 3574 device_printf(sc->dev, 3575 "failed to setup rss tables\n"); 3576 return err; 3577 } 3578 3579 /* just enable an identity mapping */ 3580 itable = sc->sram + cmd.data0; 3581 for (i = 0; i < sc->num_slices; i++) 3582 itable[i] = (uint8_t)i; 3583 3584 cmd.data0 = 1; 3585 cmd.data1 = mxge_rss_hash_type; 3586 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3587 if (err != 0) { 3588 device_printf(sc->dev, "failed to enable slices\n"); 3589 return err; 3590 } 3591 } 3592 3593 3594 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3595 3596 cmd.data0 = nbufs; 3597 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3598 &cmd); 3599 /* error is only meaningful if we're trying to set 3600 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3601 if (err && nbufs > 1) { 3602 device_printf(sc->dev, 3603 "Failed to set alway-use-n to %d\n", 3604 nbufs); 3605 return EIO; 3606 } 3607 /* Give the firmware the mtu and the big and small buffer 3608 sizes. The firmware wants the big buf size to be a power 3609 of two. Luckily, FreeBSD's clusters are powers of two */ 3610 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3611 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3612 cmd.data0 = MHLEN - MXGEFW_PAD; 3613 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3614 &cmd); 3615 cmd.data0 = big_bytes; 3616 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3617 3618 if (err != 0) { 3619 device_printf(sc->dev, "failed to setup params\n"); 3620 goto abort; 3621 } 3622 3623 /* Now give him the pointer to the stats block */ 3624 for (slice = 0; 3625 #ifdef IFNET_BUF_RING 3626 slice < sc->num_slices; 3627 #else 3628 slice < 1; 3629 #endif 3630 slice++) { 3631 ss = &sc->ss[slice]; 3632 cmd.data0 = 3633 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3634 cmd.data1 = 3635 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3636 cmd.data2 = sizeof(struct mcp_irq_data); 3637 cmd.data2 |= (slice << 16); 3638 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3639 } 3640 3641 if (err != 0) { 3642 bus = sc->ss->fw_stats_dma.bus_addr; 3643 bus += offsetof(struct mcp_irq_data, send_done_count); 3644 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3645 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3646 err = mxge_send_cmd(sc, 3647 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3648 &cmd); 3649 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3650 sc->fw_multicast_support = 0; 3651 } else { 3652 sc->fw_multicast_support = 1; 3653 } 3654 3655 if (err != 0) { 3656 device_printf(sc->dev, "failed to setup params\n"); 3657 goto abort; 3658 } 3659 3660 for (slice = 0; slice < sc->num_slices; slice++) { 3661 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3662 if (err != 0) { 3663 device_printf(sc->dev, "couldn't open slice %d\n", 3664 slice); 3665 goto abort; 3666 } 3667 } 3668 3669 /* Finally, start the firmware running */ 3670 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3671 if (err) { 3672 device_printf(sc->dev, "Couldn't bring up link\n"); 3673 goto abort; 3674 } 3675 #ifdef IFNET_BUF_RING 3676 for (slice = 0; slice < sc->num_slices; slice++) { 3677 ss = &sc->ss[slice]; 3678 ss->if_drv_flags |= IFF_DRV_RUNNING; 3679 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3680 } 3681 #endif 3682 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3683 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3684 3685 return 0; 3686 3687 3688 abort: 3689 mxge_free_mbufs(sc); 3690 3691 return err; 3692 } 3693 3694 static int 3695 mxge_close(mxge_softc_t *sc, int down) 3696 { 3697 mxge_cmd_t cmd; 3698 int err, old_down_cnt; 3699 #ifdef IFNET_BUF_RING 3700 struct mxge_slice_state *ss; 3701 int slice; 3702 #endif 3703 3704 #ifdef IFNET_BUF_RING 3705 for (slice = 0; slice < sc->num_slices; slice++) { 3706 ss = &sc->ss[slice]; 3707 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3708 } 3709 #endif 3710 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3711 if (!down) { 3712 old_down_cnt = sc->down_cnt; 3713 wmb(); 3714 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3715 if (err) { 3716 device_printf(sc->dev, 3717 "Couldn't bring down link\n"); 3718 } 3719 if (old_down_cnt == sc->down_cnt) { 3720 /* wait for down irq */ 3721 DELAY(10 * sc->intr_coal_delay); 3722 } 3723 wmb(); 3724 if (old_down_cnt == sc->down_cnt) { 3725 device_printf(sc->dev, "never got down irq\n"); 3726 } 3727 } 3728 mxge_free_mbufs(sc); 3729 3730 return 0; 3731 } 3732 3733 static void 3734 mxge_setup_cfg_space(mxge_softc_t *sc) 3735 { 3736 device_t dev = sc->dev; 3737 int reg; 3738 uint16_t cmd, lnk, pectl; 3739 3740 /* find the PCIe link width and set max read request to 4KB*/ 3741 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3742 lnk = pci_read_config(dev, reg + 0x12, 2); 3743 sc->link_width = (lnk >> 4) & 0x3f; 3744 3745 if (sc->pectl == 0) { 3746 pectl = pci_read_config(dev, reg + 0x8, 2); 3747 pectl = (pectl & ~0x7000) | (5 << 12); 3748 pci_write_config(dev, reg + 0x8, pectl, 2); 3749 sc->pectl = pectl; 3750 } else { 3751 /* restore saved pectl after watchdog reset */ 3752 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3753 } 3754 } 3755 3756 /* Enable DMA and Memory space access */ 3757 pci_enable_busmaster(dev); 3758 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3759 cmd |= PCIM_CMD_MEMEN; 3760 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3761 } 3762 3763 static uint32_t 3764 mxge_read_reboot(mxge_softc_t *sc) 3765 { 3766 device_t dev = sc->dev; 3767 uint32_t vs; 3768 3769 /* find the vendor specific offset */ 3770 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3771 device_printf(sc->dev, 3772 "could not find vendor specific offset\n"); 3773 return (uint32_t)-1; 3774 } 3775 /* enable read32 mode */ 3776 pci_write_config(dev, vs + 0x10, 0x3, 1); 3777 /* tell NIC which register to read */ 3778 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3779 return (pci_read_config(dev, vs + 0x14, 4)); 3780 } 3781 3782 static void 3783 mxge_watchdog_reset(mxge_softc_t *sc) 3784 { 3785 struct pci_devinfo *dinfo; 3786 struct mxge_slice_state *ss; 3787 int err, running, s, num_tx_slices = 1; 3788 uint32_t reboot; 3789 uint16_t cmd; 3790 3791 err = ENXIO; 3792 3793 device_printf(sc->dev, "Watchdog reset!\n"); 3794 3795 /* 3796 * check to see if the NIC rebooted. If it did, then all of 3797 * PCI config space has been reset, and things like the 3798 * busmaster bit will be zero. If this is the case, then we 3799 * must restore PCI config space before the NIC can be used 3800 * again 3801 */ 3802 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3803 if (cmd == 0xffff) { 3804 /* 3805 * maybe the watchdog caught the NIC rebooting; wait 3806 * up to 100ms for it to finish. If it does not come 3807 * back, then give up 3808 */ 3809 DELAY(1000*100); 3810 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3811 if (cmd == 0xffff) { 3812 device_printf(sc->dev, "NIC disappeared!\n"); 3813 } 3814 } 3815 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3816 /* print the reboot status */ 3817 reboot = mxge_read_reboot(sc); 3818 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3819 reboot); 3820 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3821 if (running) { 3822 3823 /* 3824 * quiesce NIC so that TX routines will not try to 3825 * xmit after restoration of BAR 3826 */ 3827 3828 /* Mark the link as down */ 3829 if (sc->link_state) { 3830 sc->link_state = 0; 3831 if_link_state_change(sc->ifp, 3832 LINK_STATE_DOWN); 3833 } 3834 #ifdef IFNET_BUF_RING 3835 num_tx_slices = sc->num_slices; 3836 #endif 3837 /* grab all TX locks to ensure no tx */ 3838 for (s = 0; s < num_tx_slices; s++) { 3839 ss = &sc->ss[s]; 3840 mtx_lock(&ss->tx.mtx); 3841 } 3842 mxge_close(sc, 1); 3843 } 3844 /* restore PCI configuration space */ 3845 dinfo = device_get_ivars(sc->dev); 3846 pci_cfg_restore(sc->dev, dinfo); 3847 3848 /* and redo any changes we made to our config space */ 3849 mxge_setup_cfg_space(sc); 3850 3851 /* reload f/w */ 3852 err = mxge_load_firmware(sc, 0); 3853 if (err) { 3854 device_printf(sc->dev, 3855 "Unable to re-load f/w\n"); 3856 } 3857 if (running) { 3858 if (!err) 3859 err = mxge_open(sc); 3860 /* release all TX locks */ 3861 for (s = 0; s < num_tx_slices; s++) { 3862 ss = &sc->ss[s]; 3863 #ifdef IFNET_BUF_RING 3864 mxge_start_locked(ss); 3865 #endif 3866 mtx_unlock(&ss->tx.mtx); 3867 } 3868 } 3869 sc->watchdog_resets++; 3870 } else { 3871 device_printf(sc->dev, 3872 "NIC did not reboot, not resetting\n"); 3873 err = 0; 3874 } 3875 if (err) { 3876 device_printf(sc->dev, "watchdog reset failed\n"); 3877 } else { 3878 if (sc->dying == 2) 3879 sc->dying = 0; 3880 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3881 } 3882 } 3883 3884 static void 3885 mxge_watchdog_task(void *arg, int pending) 3886 { 3887 mxge_softc_t *sc = arg; 3888 3889 3890 mtx_lock(&sc->driver_mtx); 3891 mxge_watchdog_reset(sc); 3892 mtx_unlock(&sc->driver_mtx); 3893 } 3894 3895 static void 3896 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3897 { 3898 tx = &sc->ss[slice].tx; 3899 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3900 device_printf(sc->dev, 3901 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3902 tx->req, tx->done, tx->queue_active); 3903 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3904 tx->activate, tx->deactivate); 3905 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3906 tx->pkt_done, 3907 be32toh(sc->ss->fw_stats->send_done_count)); 3908 } 3909 3910 static int 3911 mxge_watchdog(mxge_softc_t *sc) 3912 { 3913 mxge_tx_ring_t *tx; 3914 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3915 int i, err = 0; 3916 3917 /* see if we have outstanding transmits, which 3918 have been pending for more than mxge_ticks */ 3919 for (i = 0; 3920 #ifdef IFNET_BUF_RING 3921 (i < sc->num_slices) && (err == 0); 3922 #else 3923 (i < 1) && (err == 0); 3924 #endif 3925 i++) { 3926 tx = &sc->ss[i].tx; 3927 if (tx->req != tx->done && 3928 tx->watchdog_req != tx->watchdog_done && 3929 tx->done == tx->watchdog_done) { 3930 /* check for pause blocking before resetting */ 3931 if (tx->watchdog_rx_pause == rx_pause) { 3932 mxge_warn_stuck(sc, tx, i); 3933 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3934 return (ENXIO); 3935 } 3936 else 3937 device_printf(sc->dev, "Flow control blocking " 3938 "xmits, check link partner\n"); 3939 } 3940 3941 tx->watchdog_req = tx->req; 3942 tx->watchdog_done = tx->done; 3943 tx->watchdog_rx_pause = rx_pause; 3944 } 3945 3946 if (sc->need_media_probe) 3947 mxge_media_probe(sc); 3948 return (err); 3949 } 3950 3951 static u_long 3952 mxge_update_stats(mxge_softc_t *sc) 3953 { 3954 struct mxge_slice_state *ss; 3955 u_long pkts = 0; 3956 u_long ipackets = 0; 3957 u_long opackets = 0; 3958 #ifdef IFNET_BUF_RING 3959 u_long obytes = 0; 3960 u_long omcasts = 0; 3961 u_long odrops = 0; 3962 #endif 3963 u_long oerrors = 0; 3964 int slice; 3965 3966 for (slice = 0; slice < sc->num_slices; slice++) { 3967 ss = &sc->ss[slice]; 3968 ipackets += ss->ipackets; 3969 opackets += ss->opackets; 3970 #ifdef IFNET_BUF_RING 3971 obytes += ss->obytes; 3972 omcasts += ss->omcasts; 3973 odrops += ss->tx.br->br_drops; 3974 #endif 3975 oerrors += ss->oerrors; 3976 } 3977 pkts = (ipackets - sc->ifp->if_ipackets); 3978 pkts += (opackets - sc->ifp->if_opackets); 3979 sc->ifp->if_ipackets = ipackets; 3980 sc->ifp->if_opackets = opackets; 3981 #ifdef IFNET_BUF_RING 3982 sc->ifp->if_obytes = obytes; 3983 sc->ifp->if_omcasts = omcasts; 3984 sc->ifp->if_snd.ifq_drops = odrops; 3985 #endif 3986 sc->ifp->if_oerrors = oerrors; 3987 return pkts; 3988 } 3989 3990 static void 3991 mxge_tick(void *arg) 3992 { 3993 mxge_softc_t *sc = arg; 3994 u_long pkts = 0; 3995 int err = 0; 3996 int running, ticks; 3997 uint16_t cmd; 3998 3999 ticks = mxge_ticks; 4000 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4001 if (running) { 4002 /* aggregate stats from different slices */ 4003 pkts = mxge_update_stats(sc); 4004 if (!sc->watchdog_countdown) { 4005 err = mxge_watchdog(sc); 4006 sc->watchdog_countdown = 4; 4007 } 4008 sc->watchdog_countdown--; 4009 } 4010 if (pkts == 0) { 4011 /* ensure NIC did not suffer h/w fault while idle */ 4012 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4013 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4014 sc->dying = 2; 4015 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4016 err = ENXIO; 4017 } 4018 /* look less often if NIC is idle */ 4019 ticks *= 4; 4020 } 4021 4022 if (err == 0) 4023 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4024 4025 } 4026 4027 static int 4028 mxge_media_change(struct ifnet *ifp) 4029 { 4030 return EINVAL; 4031 } 4032 4033 static int 4034 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4035 { 4036 struct ifnet *ifp = sc->ifp; 4037 int real_mtu, old_mtu; 4038 int err = 0; 4039 4040 4041 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4042 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4043 return EINVAL; 4044 mtx_lock(&sc->driver_mtx); 4045 old_mtu = ifp->if_mtu; 4046 ifp->if_mtu = mtu; 4047 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4048 mxge_close(sc, 0); 4049 err = mxge_open(sc); 4050 if (err != 0) { 4051 ifp->if_mtu = old_mtu; 4052 mxge_close(sc, 0); 4053 (void) mxge_open(sc); 4054 } 4055 } 4056 mtx_unlock(&sc->driver_mtx); 4057 return err; 4058 } 4059 4060 static void 4061 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4062 { 4063 mxge_softc_t *sc = ifp->if_softc; 4064 4065 4066 if (sc == NULL) 4067 return; 4068 ifmr->ifm_status = IFM_AVALID; 4069 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4070 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4071 ifmr->ifm_active |= sc->current_media; 4072 } 4073 4074 static int 4075 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4076 { 4077 mxge_softc_t *sc = ifp->if_softc; 4078 struct ifreq *ifr = (struct ifreq *)data; 4079 int err, mask; 4080 4081 err = 0; 4082 switch (command) { 4083 case SIOCSIFADDR: 4084 case SIOCGIFADDR: 4085 err = ether_ioctl(ifp, command, data); 4086 break; 4087 4088 case SIOCSIFMTU: 4089 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4090 break; 4091 4092 case SIOCSIFFLAGS: 4093 mtx_lock(&sc->driver_mtx); 4094 if (sc->dying) { 4095 mtx_unlock(&sc->driver_mtx); 4096 return EINVAL; 4097 } 4098 if (ifp->if_flags & IFF_UP) { 4099 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4100 err = mxge_open(sc); 4101 } else { 4102 /* take care of promis can allmulti 4103 flag chages */ 4104 mxge_change_promisc(sc, 4105 ifp->if_flags & IFF_PROMISC); 4106 mxge_set_multicast_list(sc); 4107 } 4108 } else { 4109 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4110 mxge_close(sc, 0); 4111 } 4112 } 4113 mtx_unlock(&sc->driver_mtx); 4114 break; 4115 4116 case SIOCADDMULTI: 4117 case SIOCDELMULTI: 4118 mtx_lock(&sc->driver_mtx); 4119 mxge_set_multicast_list(sc); 4120 mtx_unlock(&sc->driver_mtx); 4121 break; 4122 4123 case SIOCSIFCAP: 4124 mtx_lock(&sc->driver_mtx); 4125 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4126 if (mask & IFCAP_TXCSUM) { 4127 if (IFCAP_TXCSUM & ifp->if_capenable) { 4128 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4129 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4130 | CSUM_TSO); 4131 } else { 4132 ifp->if_capenable |= IFCAP_TXCSUM; 4133 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4134 } 4135 } else if (mask & IFCAP_RXCSUM) { 4136 if (IFCAP_RXCSUM & ifp->if_capenable) { 4137 ifp->if_capenable &= ~IFCAP_RXCSUM; 4138 sc->csum_flag = 0; 4139 } else { 4140 ifp->if_capenable |= IFCAP_RXCSUM; 4141 sc->csum_flag = 1; 4142 } 4143 } 4144 if (mask & IFCAP_TSO4) { 4145 if (IFCAP_TSO4 & ifp->if_capenable) { 4146 ifp->if_capenable &= ~IFCAP_TSO4; 4147 ifp->if_hwassist &= ~CSUM_TSO; 4148 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4149 ifp->if_capenable |= IFCAP_TSO4; 4150 ifp->if_hwassist |= CSUM_TSO; 4151 } else { 4152 printf("mxge requires tx checksum offload" 4153 " be enabled to use TSO\n"); 4154 err = EINVAL; 4155 } 4156 } 4157 if (mask & IFCAP_LRO) { 4158 if (IFCAP_LRO & ifp->if_capenable) 4159 err = mxge_change_lro_locked(sc, 0); 4160 else 4161 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4162 } 4163 if (mask & IFCAP_VLAN_HWTAGGING) 4164 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4165 if (mask & IFCAP_VLAN_HWTSO) 4166 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4167 4168 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4169 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4170 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4171 4172 mtx_unlock(&sc->driver_mtx); 4173 VLAN_CAPABILITIES(ifp); 4174 4175 break; 4176 4177 case SIOCGIFMEDIA: 4178 mtx_lock(&sc->driver_mtx); 4179 mxge_media_probe(sc); 4180 mtx_unlock(&sc->driver_mtx); 4181 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4182 &sc->media, command); 4183 break; 4184 4185 default: 4186 err = ENOTTY; 4187 } 4188 return err; 4189 } 4190 4191 static void 4192 mxge_fetch_tunables(mxge_softc_t *sc) 4193 { 4194 4195 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4196 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4197 &mxge_flow_control); 4198 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4199 &mxge_intr_coal_delay); 4200 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4201 &mxge_nvidia_ecrc_enable); 4202 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4203 &mxge_force_firmware); 4204 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4205 &mxge_deassert_wait); 4206 TUNABLE_INT_FETCH("hw.mxge.verbose", 4207 &mxge_verbose); 4208 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4209 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4210 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4211 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4212 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4213 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4214 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4215 if (sc->lro_cnt != 0) 4216 mxge_lro_cnt = sc->lro_cnt; 4217 4218 if (bootverbose) 4219 mxge_verbose = 1; 4220 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4221 mxge_intr_coal_delay = 30; 4222 if (mxge_ticks == 0) 4223 mxge_ticks = hz / 2; 4224 sc->pause = mxge_flow_control; 4225 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4226 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4227 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4228 } 4229 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4230 mxge_initial_mtu < ETHER_MIN_LEN) 4231 mxge_initial_mtu = ETHERMTU_JUMBO; 4232 4233 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4234 mxge_throttle = MXGE_MAX_THROTTLE; 4235 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4236 mxge_throttle = MXGE_MIN_THROTTLE; 4237 sc->throttle = mxge_throttle; 4238 } 4239 4240 4241 static void 4242 mxge_free_slices(mxge_softc_t *sc) 4243 { 4244 struct mxge_slice_state *ss; 4245 int i; 4246 4247 4248 if (sc->ss == NULL) 4249 return; 4250 4251 for (i = 0; i < sc->num_slices; i++) { 4252 ss = &sc->ss[i]; 4253 if (ss->fw_stats != NULL) { 4254 mxge_dma_free(&ss->fw_stats_dma); 4255 ss->fw_stats = NULL; 4256 #ifdef IFNET_BUF_RING 4257 if (ss->tx.br != NULL) { 4258 drbr_free(ss->tx.br, M_DEVBUF); 4259 ss->tx.br = NULL; 4260 } 4261 #endif 4262 mtx_destroy(&ss->tx.mtx); 4263 } 4264 if (ss->rx_done.entry != NULL) { 4265 mxge_dma_free(&ss->rx_done.dma); 4266 ss->rx_done.entry = NULL; 4267 } 4268 } 4269 free(sc->ss, M_DEVBUF); 4270 sc->ss = NULL; 4271 } 4272 4273 static int 4274 mxge_alloc_slices(mxge_softc_t *sc) 4275 { 4276 mxge_cmd_t cmd; 4277 struct mxge_slice_state *ss; 4278 size_t bytes; 4279 int err, i, max_intr_slots; 4280 4281 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4282 if (err != 0) { 4283 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4284 return err; 4285 } 4286 sc->rx_ring_size = cmd.data0; 4287 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4288 4289 bytes = sizeof (*sc->ss) * sc->num_slices; 4290 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4291 if (sc->ss == NULL) 4292 return (ENOMEM); 4293 for (i = 0; i < sc->num_slices; i++) { 4294 ss = &sc->ss[i]; 4295 4296 ss->sc = sc; 4297 4298 /* allocate per-slice rx interrupt queues */ 4299 4300 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4301 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4302 if (err != 0) 4303 goto abort; 4304 ss->rx_done.entry = ss->rx_done.dma.addr; 4305 bzero(ss->rx_done.entry, bytes); 4306 4307 /* 4308 * allocate the per-slice firmware stats; stats 4309 * (including tx) are used used only on the first 4310 * slice for now 4311 */ 4312 #ifndef IFNET_BUF_RING 4313 if (i > 0) 4314 continue; 4315 #endif 4316 4317 bytes = sizeof (*ss->fw_stats); 4318 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4319 sizeof (*ss->fw_stats), 64); 4320 if (err != 0) 4321 goto abort; 4322 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4323 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4324 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4325 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4326 #ifdef IFNET_BUF_RING 4327 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4328 &ss->tx.mtx); 4329 #endif 4330 } 4331 4332 return (0); 4333 4334 abort: 4335 mxge_free_slices(sc); 4336 return (ENOMEM); 4337 } 4338 4339 static void 4340 mxge_slice_probe(mxge_softc_t *sc) 4341 { 4342 mxge_cmd_t cmd; 4343 char *old_fw; 4344 int msix_cnt, status, max_intr_slots; 4345 4346 sc->num_slices = 1; 4347 /* 4348 * don't enable multiple slices if they are not enabled, 4349 * or if this is not an SMP system 4350 */ 4351 4352 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4353 return; 4354 4355 /* see how many MSI-X interrupts are available */ 4356 msix_cnt = pci_msix_count(sc->dev); 4357 if (msix_cnt < 2) 4358 return; 4359 4360 /* now load the slice aware firmware see what it supports */ 4361 old_fw = sc->fw_name; 4362 if (old_fw == mxge_fw_aligned) 4363 sc->fw_name = mxge_fw_rss_aligned; 4364 else 4365 sc->fw_name = mxge_fw_rss_unaligned; 4366 status = mxge_load_firmware(sc, 0); 4367 if (status != 0) { 4368 device_printf(sc->dev, "Falling back to a single slice\n"); 4369 return; 4370 } 4371 4372 /* try to send a reset command to the card to see if it 4373 is alive */ 4374 memset(&cmd, 0, sizeof (cmd)); 4375 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4376 if (status != 0) { 4377 device_printf(sc->dev, "failed reset\n"); 4378 goto abort_with_fw; 4379 } 4380 4381 /* get rx ring size */ 4382 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4383 if (status != 0) { 4384 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4385 goto abort_with_fw; 4386 } 4387 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4388 4389 /* tell it the size of the interrupt queues */ 4390 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4391 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4392 if (status != 0) { 4393 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4394 goto abort_with_fw; 4395 } 4396 4397 /* ask the maximum number of slices it supports */ 4398 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4399 if (status != 0) { 4400 device_printf(sc->dev, 4401 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4402 goto abort_with_fw; 4403 } 4404 sc->num_slices = cmd.data0; 4405 if (sc->num_slices > msix_cnt) 4406 sc->num_slices = msix_cnt; 4407 4408 if (mxge_max_slices == -1) { 4409 /* cap to number of CPUs in system */ 4410 if (sc->num_slices > mp_ncpus) 4411 sc->num_slices = mp_ncpus; 4412 } else { 4413 if (sc->num_slices > mxge_max_slices) 4414 sc->num_slices = mxge_max_slices; 4415 } 4416 /* make sure it is a power of two */ 4417 while (sc->num_slices & (sc->num_slices - 1)) 4418 sc->num_slices--; 4419 4420 if (mxge_verbose) 4421 device_printf(sc->dev, "using %d slices\n", 4422 sc->num_slices); 4423 4424 return; 4425 4426 abort_with_fw: 4427 sc->fw_name = old_fw; 4428 (void) mxge_load_firmware(sc, 0); 4429 } 4430 4431 static int 4432 mxge_add_msix_irqs(mxge_softc_t *sc) 4433 { 4434 size_t bytes; 4435 int count, err, i, rid; 4436 4437 rid = PCIR_BAR(2); 4438 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4439 &rid, RF_ACTIVE); 4440 4441 if (sc->msix_table_res == NULL) { 4442 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4443 return ENXIO; 4444 } 4445 4446 count = sc->num_slices; 4447 err = pci_alloc_msix(sc->dev, &count); 4448 if (err != 0) { 4449 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4450 "err = %d \n", sc->num_slices, err); 4451 goto abort_with_msix_table; 4452 } 4453 if (count < sc->num_slices) { 4454 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4455 count, sc->num_slices); 4456 device_printf(sc->dev, 4457 "Try setting hw.mxge.max_slices to %d\n", 4458 count); 4459 err = ENOSPC; 4460 goto abort_with_msix; 4461 } 4462 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4463 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4464 if (sc->msix_irq_res == NULL) { 4465 err = ENOMEM; 4466 goto abort_with_msix; 4467 } 4468 4469 for (i = 0; i < sc->num_slices; i++) { 4470 rid = i + 1; 4471 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4472 SYS_RES_IRQ, 4473 &rid, RF_ACTIVE); 4474 if (sc->msix_irq_res[i] == NULL) { 4475 device_printf(sc->dev, "couldn't allocate IRQ res" 4476 " for message %d\n", i); 4477 err = ENXIO; 4478 goto abort_with_res; 4479 } 4480 } 4481 4482 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4483 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4484 4485 for (i = 0; i < sc->num_slices; i++) { 4486 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4487 INTR_TYPE_NET | INTR_MPSAFE, 4488 #if __FreeBSD_version > 700030 4489 NULL, 4490 #endif 4491 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4492 if (err != 0) { 4493 device_printf(sc->dev, "couldn't setup intr for " 4494 "message %d\n", i); 4495 goto abort_with_intr; 4496 } 4497 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4498 sc->msix_ih[i], "s%d", i); 4499 } 4500 4501 if (mxge_verbose) { 4502 device_printf(sc->dev, "using %d msix IRQs:", 4503 sc->num_slices); 4504 for (i = 0; i < sc->num_slices; i++) 4505 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4506 printf("\n"); 4507 } 4508 return (0); 4509 4510 abort_with_intr: 4511 for (i = 0; i < sc->num_slices; i++) { 4512 if (sc->msix_ih[i] != NULL) { 4513 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4514 sc->msix_ih[i]); 4515 sc->msix_ih[i] = NULL; 4516 } 4517 } 4518 free(sc->msix_ih, M_DEVBUF); 4519 4520 4521 abort_with_res: 4522 for (i = 0; i < sc->num_slices; i++) { 4523 rid = i + 1; 4524 if (sc->msix_irq_res[i] != NULL) 4525 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4526 sc->msix_irq_res[i]); 4527 sc->msix_irq_res[i] = NULL; 4528 } 4529 free(sc->msix_irq_res, M_DEVBUF); 4530 4531 4532 abort_with_msix: 4533 pci_release_msi(sc->dev); 4534 4535 abort_with_msix_table: 4536 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4537 sc->msix_table_res); 4538 4539 return err; 4540 } 4541 4542 static int 4543 mxge_add_single_irq(mxge_softc_t *sc) 4544 { 4545 int count, err, rid; 4546 4547 count = pci_msi_count(sc->dev); 4548 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4549 rid = 1; 4550 } else { 4551 rid = 0; 4552 sc->legacy_irq = 1; 4553 } 4554 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4555 1, RF_SHAREABLE | RF_ACTIVE); 4556 if (sc->irq_res == NULL) { 4557 device_printf(sc->dev, "could not alloc interrupt\n"); 4558 return ENXIO; 4559 } 4560 if (mxge_verbose) 4561 device_printf(sc->dev, "using %s irq %ld\n", 4562 sc->legacy_irq ? "INTx" : "MSI", 4563 rman_get_start(sc->irq_res)); 4564 err = bus_setup_intr(sc->dev, sc->irq_res, 4565 INTR_TYPE_NET | INTR_MPSAFE, 4566 #if __FreeBSD_version > 700030 4567 NULL, 4568 #endif 4569 mxge_intr, &sc->ss[0], &sc->ih); 4570 if (err != 0) { 4571 bus_release_resource(sc->dev, SYS_RES_IRQ, 4572 sc->legacy_irq ? 0 : 1, sc->irq_res); 4573 if (!sc->legacy_irq) 4574 pci_release_msi(sc->dev); 4575 } 4576 return err; 4577 } 4578 4579 static void 4580 mxge_rem_msix_irqs(mxge_softc_t *sc) 4581 { 4582 int i, rid; 4583 4584 for (i = 0; i < sc->num_slices; i++) { 4585 if (sc->msix_ih[i] != NULL) { 4586 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4587 sc->msix_ih[i]); 4588 sc->msix_ih[i] = NULL; 4589 } 4590 } 4591 free(sc->msix_ih, M_DEVBUF); 4592 4593 for (i = 0; i < sc->num_slices; i++) { 4594 rid = i + 1; 4595 if (sc->msix_irq_res[i] != NULL) 4596 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4597 sc->msix_irq_res[i]); 4598 sc->msix_irq_res[i] = NULL; 4599 } 4600 free(sc->msix_irq_res, M_DEVBUF); 4601 4602 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4603 sc->msix_table_res); 4604 4605 pci_release_msi(sc->dev); 4606 return; 4607 } 4608 4609 static void 4610 mxge_rem_single_irq(mxge_softc_t *sc) 4611 { 4612 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4613 bus_release_resource(sc->dev, SYS_RES_IRQ, 4614 sc->legacy_irq ? 0 : 1, sc->irq_res); 4615 if (!sc->legacy_irq) 4616 pci_release_msi(sc->dev); 4617 } 4618 4619 static void 4620 mxge_rem_irq(mxge_softc_t *sc) 4621 { 4622 if (sc->num_slices > 1) 4623 mxge_rem_msix_irqs(sc); 4624 else 4625 mxge_rem_single_irq(sc); 4626 } 4627 4628 static int 4629 mxge_add_irq(mxge_softc_t *sc) 4630 { 4631 int err; 4632 4633 if (sc->num_slices > 1) 4634 err = mxge_add_msix_irqs(sc); 4635 else 4636 err = mxge_add_single_irq(sc); 4637 4638 if (0 && err == 0 && sc->num_slices > 1) { 4639 mxge_rem_msix_irqs(sc); 4640 err = mxge_add_msix_irqs(sc); 4641 } 4642 return err; 4643 } 4644 4645 4646 static int 4647 mxge_attach(device_t dev) 4648 { 4649 mxge_softc_t *sc = device_get_softc(dev); 4650 struct ifnet *ifp; 4651 int err, rid; 4652 4653 sc->dev = dev; 4654 mxge_fetch_tunables(sc); 4655 4656 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4657 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4658 taskqueue_thread_enqueue, &sc->tq); 4659 if (sc->tq == NULL) { 4660 err = ENOMEM; 4661 goto abort_with_nothing; 4662 } 4663 4664 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4665 1, /* alignment */ 4666 0, /* boundary */ 4667 BUS_SPACE_MAXADDR, /* low */ 4668 BUS_SPACE_MAXADDR, /* high */ 4669 NULL, NULL, /* filter */ 4670 65536 + 256, /* maxsize */ 4671 MXGE_MAX_SEND_DESC, /* num segs */ 4672 65536, /* maxsegsize */ 4673 0, /* flags */ 4674 NULL, NULL, /* lock */ 4675 &sc->parent_dmat); /* tag */ 4676 4677 if (err != 0) { 4678 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4679 err); 4680 goto abort_with_tq; 4681 } 4682 4683 ifp = sc->ifp = if_alloc(IFT_ETHER); 4684 if (ifp == NULL) { 4685 device_printf(dev, "can not if_alloc()\n"); 4686 err = ENOSPC; 4687 goto abort_with_parent_dmat; 4688 } 4689 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4690 4691 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4692 device_get_nameunit(dev)); 4693 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4694 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4695 "%s:drv", device_get_nameunit(dev)); 4696 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4697 MTX_NETWORK_LOCK, MTX_DEF); 4698 4699 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4700 4701 mxge_setup_cfg_space(sc); 4702 4703 /* Map the board into the kernel */ 4704 rid = PCIR_BARS; 4705 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4706 ~0, 1, RF_ACTIVE); 4707 if (sc->mem_res == NULL) { 4708 device_printf(dev, "could not map memory\n"); 4709 err = ENXIO; 4710 goto abort_with_lock; 4711 } 4712 sc->sram = rman_get_virtual(sc->mem_res); 4713 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4714 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4715 device_printf(dev, "impossible memory region size %ld\n", 4716 rman_get_size(sc->mem_res)); 4717 err = ENXIO; 4718 goto abort_with_mem_res; 4719 } 4720 4721 /* make NULL terminated copy of the EEPROM strings section of 4722 lanai SRAM */ 4723 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4724 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4725 rman_get_bushandle(sc->mem_res), 4726 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4727 sc->eeprom_strings, 4728 MXGE_EEPROM_STRINGS_SIZE - 2); 4729 err = mxge_parse_strings(sc); 4730 if (err != 0) 4731 goto abort_with_mem_res; 4732 4733 /* Enable write combining for efficient use of PCIe bus */ 4734 mxge_enable_wc(sc); 4735 4736 /* Allocate the out of band dma memory */ 4737 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4738 sizeof (mxge_cmd_t), 64); 4739 if (err != 0) 4740 goto abort_with_mem_res; 4741 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4742 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4743 if (err != 0) 4744 goto abort_with_cmd_dma; 4745 4746 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4747 if (err != 0) 4748 goto abort_with_zeropad_dma; 4749 4750 /* select & load the firmware */ 4751 err = mxge_select_firmware(sc); 4752 if (err != 0) 4753 goto abort_with_dmabench; 4754 sc->intr_coal_delay = mxge_intr_coal_delay; 4755 4756 mxge_slice_probe(sc); 4757 err = mxge_alloc_slices(sc); 4758 if (err != 0) 4759 goto abort_with_dmabench; 4760 4761 err = mxge_reset(sc, 0); 4762 if (err != 0) 4763 goto abort_with_slices; 4764 4765 err = mxge_alloc_rings(sc); 4766 if (err != 0) { 4767 device_printf(sc->dev, "failed to allocate rings\n"); 4768 goto abort_with_slices; 4769 } 4770 4771 err = mxge_add_irq(sc); 4772 if (err != 0) { 4773 device_printf(sc->dev, "failed to add irq\n"); 4774 goto abort_with_rings; 4775 } 4776 4777 if_initbaudrate(ifp, IF_Gbps(10)); 4778 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4779 IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 4780 #ifdef INET 4781 ifp->if_capabilities |= IFCAP_LRO; 4782 #endif 4783 4784 #ifdef MXGE_NEW_VLAN_API 4785 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4786 4787 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4788 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4789 sc->fw_ver_tiny >= 32) 4790 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4791 #endif 4792 4793 sc->max_mtu = mxge_max_mtu(sc); 4794 if (sc->max_mtu >= 9000) 4795 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4796 else 4797 device_printf(dev, "MTU limited to %d. Install " 4798 "latest firmware for 9000 byte jumbo support\n", 4799 sc->max_mtu - ETHER_HDR_LEN); 4800 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4801 ifp->if_capenable = ifp->if_capabilities; 4802 if (sc->lro_cnt == 0) 4803 ifp->if_capenable &= ~IFCAP_LRO; 4804 sc->csum_flag = 1; 4805 ifp->if_init = mxge_init; 4806 ifp->if_softc = sc; 4807 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4808 ifp->if_ioctl = mxge_ioctl; 4809 ifp->if_start = mxge_start; 4810 /* Initialise the ifmedia structure */ 4811 ifmedia_init(&sc->media, 0, mxge_media_change, 4812 mxge_media_status); 4813 mxge_media_init(sc); 4814 mxge_media_probe(sc); 4815 sc->dying = 0; 4816 ether_ifattach(ifp, sc->mac_addr); 4817 /* ether_ifattach sets mtu to ETHERMTU */ 4818 if (mxge_initial_mtu != ETHERMTU) 4819 mxge_change_mtu(sc, mxge_initial_mtu); 4820 4821 mxge_add_sysctls(sc); 4822 #ifdef IFNET_BUF_RING 4823 ifp->if_transmit = mxge_transmit; 4824 ifp->if_qflush = mxge_qflush; 4825 #endif 4826 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4827 device_get_nameunit(sc->dev)); 4828 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4829 return 0; 4830 4831 abort_with_rings: 4832 mxge_free_rings(sc); 4833 abort_with_slices: 4834 mxge_free_slices(sc); 4835 abort_with_dmabench: 4836 mxge_dma_free(&sc->dmabench_dma); 4837 abort_with_zeropad_dma: 4838 mxge_dma_free(&sc->zeropad_dma); 4839 abort_with_cmd_dma: 4840 mxge_dma_free(&sc->cmd_dma); 4841 abort_with_mem_res: 4842 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4843 abort_with_lock: 4844 pci_disable_busmaster(dev); 4845 mtx_destroy(&sc->cmd_mtx); 4846 mtx_destroy(&sc->driver_mtx); 4847 if_free(ifp); 4848 abort_with_parent_dmat: 4849 bus_dma_tag_destroy(sc->parent_dmat); 4850 abort_with_tq: 4851 if (sc->tq != NULL) { 4852 taskqueue_drain(sc->tq, &sc->watchdog_task); 4853 taskqueue_free(sc->tq); 4854 sc->tq = NULL; 4855 } 4856 abort_with_nothing: 4857 return err; 4858 } 4859 4860 static int 4861 mxge_detach(device_t dev) 4862 { 4863 mxge_softc_t *sc = device_get_softc(dev); 4864 4865 if (mxge_vlans_active(sc)) { 4866 device_printf(sc->dev, 4867 "Detach vlans before removing module\n"); 4868 return EBUSY; 4869 } 4870 mtx_lock(&sc->driver_mtx); 4871 sc->dying = 1; 4872 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4873 mxge_close(sc, 0); 4874 mtx_unlock(&sc->driver_mtx); 4875 ether_ifdetach(sc->ifp); 4876 if (sc->tq != NULL) { 4877 taskqueue_drain(sc->tq, &sc->watchdog_task); 4878 taskqueue_free(sc->tq); 4879 sc->tq = NULL; 4880 } 4881 callout_drain(&sc->co_hdl); 4882 ifmedia_removeall(&sc->media); 4883 mxge_dummy_rdma(sc, 0); 4884 mxge_rem_sysctls(sc); 4885 mxge_rem_irq(sc); 4886 mxge_free_rings(sc); 4887 mxge_free_slices(sc); 4888 mxge_dma_free(&sc->dmabench_dma); 4889 mxge_dma_free(&sc->zeropad_dma); 4890 mxge_dma_free(&sc->cmd_dma); 4891 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4892 pci_disable_busmaster(dev); 4893 mtx_destroy(&sc->cmd_mtx); 4894 mtx_destroy(&sc->driver_mtx); 4895 if_free(sc->ifp); 4896 bus_dma_tag_destroy(sc->parent_dmat); 4897 return 0; 4898 } 4899 4900 static int 4901 mxge_shutdown(device_t dev) 4902 { 4903 return 0; 4904 } 4905 4906 /* 4907 This file uses Myri10GE driver indentation. 4908 4909 Local Variables: 4910 c-file-style:"linux" 4911 tab-width:8 4912 End: 4913 */ 4914