1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 /* count xmits ourselves, rather than via drbr */ 51 #define NO_SLOW_STATS 52 #include <net/if.h> 53 #include <net/if_arp.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/if_vlan_var.h> 62 #include <net/zlib.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 69 #include <machine/bus.h> 70 #include <machine/in_cksum.h> 71 #include <machine/resource.h> 72 #include <sys/bus.h> 73 #include <sys/rman.h> 74 #include <sys/smp.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 /*#define MXGE_FAKE_IFP*/ 90 #include <dev/mxge/if_mxge_var.h> 91 #ifdef IFNET_BUF_RING 92 #include <sys/buf_ring.h> 93 #endif 94 95 #include "opt_inet.h" 96 97 /* tunable params */ 98 static int mxge_nvidia_ecrc_enable = 1; 99 static int mxge_force_firmware = 0; 100 static int mxge_intr_coal_delay = 30; 101 static int mxge_deassert_wait = 1; 102 static int mxge_flow_control = 1; 103 static int mxge_verbose = 0; 104 static int mxge_lro_cnt = 8; 105 static int mxge_ticks; 106 static int mxge_max_slices = 1; 107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 108 static int mxge_always_promisc = 0; 109 static int mxge_initial_mtu = ETHERMTU_JUMBO; 110 static int mxge_throttle = 0; 111 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112 static char *mxge_fw_aligned = "mxge_eth_z8e"; 113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116 static int mxge_probe(device_t dev); 117 static int mxge_attach(device_t dev); 118 static int mxge_detach(device_t dev); 119 static int mxge_shutdown(device_t dev); 120 static void mxge_intr(void *arg); 121 122 static device_method_t mxge_methods[] = 123 { 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130 }; 131 132 static driver_t mxge_driver = 133 { 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137 }; 138 139 static devclass_t mxge_devclass; 140 141 /* Declare ourselves to be a child of the PCI bus.*/ 142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148 static int mxge_close(mxge_softc_t *sc, int down); 149 static int mxge_open(mxge_softc_t *sc); 150 static void mxge_tick(void *arg); 151 152 static int 153 mxge_probe(device_t dev) 154 { 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 200 /* callback to get our DMA address */ 201 static void 202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204 { 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263 abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265 abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268 } 269 270 271 static void 272 mxge_dma_free(mxge_dma_t *dma) 273 { 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277 } 278 279 /* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286 static int 287 mxge_parse_strings(mxge_softc_t *sc) 288 { 289 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327 } 328 329 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330 static void 331 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332 { 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381 #if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390 #endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452 } 453 #else 454 static void 455 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456 { 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460 } 461 #endif 462 463 464 static int 465 mxge_dma_test(mxge_softc_t *sc, int test_type) 466 { 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517 abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523 } 524 525 /* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544 static int 545 mxge_firmware_probe(mxge_softc_t *sc) 546 { 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595 } 596 597 static int 598 mxge_select_firmware(mxge_softc_t *sc) 599 { 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631 abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640 } 641 642 union qualhack 643 { 644 const char *ro_char; 645 char *rw_char; 646 }; 647 648 static int 649 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650 { 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static void * 680 z_alloc(void *nil, u_int items, u_int size) 681 { 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686 } 687 688 static void 689 z_free(void *nil, void *ptr) 690 { 691 free(ptr, M_TEMP); 692 } 693 694 695 static int 696 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697 { 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770 abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772 abort_with_zs: 773 inflateEnd(&zs); 774 abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777 } 778 779 /* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784 static void 785 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786 { 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834 } 835 836 static int 837 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838 { 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 default: 887 device_printf(sc->dev, 888 "mxge: command %d " 889 "failed, result = %d\n", 890 cmd, be32toh(response->result)); 891 err = ENXIO; 892 break; 893 } 894 if (err != EAGAIN) 895 break; 896 } 897 if (err == EAGAIN) 898 device_printf(sc->dev, "mxge: command %d timed out" 899 "result = %d\n", 900 cmd, be32toh(response->result)); 901 mtx_unlock(&sc->cmd_mtx); 902 return err; 903 } 904 905 static int 906 mxge_adopt_running_firmware(mxge_softc_t *sc) 907 { 908 struct mcp_gen_header *hdr; 909 const size_t bytes = sizeof (struct mcp_gen_header); 910 size_t hdr_offset; 911 int status; 912 913 /* find running firmware header */ 914 hdr_offset = htobe32(*(volatile uint32_t *) 915 (sc->sram + MCP_HEADER_PTR_OFFSET)); 916 917 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 918 device_printf(sc->dev, 919 "Running firmware has bad header offset (%d)\n", 920 (int)hdr_offset); 921 return EIO; 922 } 923 924 /* copy header of running firmware from SRAM to host memory to 925 * validate firmware */ 926 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 927 if (hdr == NULL) { 928 device_printf(sc->dev, "could not malloc firmware hdr\n"); 929 return ENOMEM; 930 } 931 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 932 rman_get_bushandle(sc->mem_res), 933 hdr_offset, (char *)hdr, bytes); 934 status = mxge_validate_firmware(sc, hdr); 935 free(hdr, M_DEVBUF); 936 937 /* 938 * check to see if adopted firmware has bug where adopting 939 * it will cause broadcasts to be filtered unless the NIC 940 * is kept in ALLMULTI mode 941 */ 942 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 943 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 944 sc->adopted_rx_filter_bug = 1; 945 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 946 "working around rx filter bug\n", 947 sc->fw_ver_major, sc->fw_ver_minor, 948 sc->fw_ver_tiny); 949 } 950 951 return status; 952 } 953 954 955 static int 956 mxge_load_firmware(mxge_softc_t *sc, int adopt) 957 { 958 volatile uint32_t *confirm; 959 volatile char *submit; 960 char buf_bytes[72]; 961 uint32_t *buf, size, dma_low, dma_high; 962 int status, i; 963 964 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 965 966 size = sc->sram_size; 967 status = mxge_load_firmware_helper(sc, &size); 968 if (status) { 969 if (!adopt) 970 return status; 971 /* Try to use the currently running firmware, if 972 it is new enough */ 973 status = mxge_adopt_running_firmware(sc); 974 if (status) { 975 device_printf(sc->dev, 976 "failed to adopt running firmware\n"); 977 return status; 978 } 979 device_printf(sc->dev, 980 "Successfully adopted running firmware\n"); 981 if (sc->tx_boundary == 4096) { 982 device_printf(sc->dev, 983 "Using firmware currently running on NIC" 984 ". For optimal\n"); 985 device_printf(sc->dev, 986 "performance consider loading optimized " 987 "firmware\n"); 988 } 989 sc->fw_name = mxge_fw_unaligned; 990 sc->tx_boundary = 2048; 991 return 0; 992 } 993 /* clear confirmation addr */ 994 confirm = (volatile uint32_t *)sc->cmd; 995 *confirm = 0; 996 wmb(); 997 /* send a reload command to the bootstrap MCP, and wait for the 998 response in the confirmation address. The firmware should 999 write a -1 there to indicate it is alive and well 1000 */ 1001 1002 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1003 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1004 1005 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1006 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1007 buf[2] = htobe32(0xffffffff); /* confirm data */ 1008 1009 /* FIX: All newest firmware should un-protect the bottom of 1010 the sram before handoff. However, the very first interfaces 1011 do not. Therefore the handoff copy must skip the first 8 bytes 1012 */ 1013 /* where the code starts*/ 1014 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1015 buf[4] = htobe32(size - 8); /* length of code */ 1016 buf[5] = htobe32(8); /* where to copy to */ 1017 buf[6] = htobe32(0); /* where to jump to */ 1018 1019 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1020 mxge_pio_copy(submit, buf, 64); 1021 wmb(); 1022 DELAY(1000); 1023 wmb(); 1024 i = 0; 1025 while (*confirm != 0xffffffff && i < 20) { 1026 DELAY(1000*10); 1027 i++; 1028 bus_dmamap_sync(sc->cmd_dma.dmat, 1029 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1030 } 1031 if (*confirm != 0xffffffff) { 1032 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1033 confirm, *confirm); 1034 1035 return ENXIO; 1036 } 1037 return 0; 1038 } 1039 1040 static int 1041 mxge_update_mac_address(mxge_softc_t *sc) 1042 { 1043 mxge_cmd_t cmd; 1044 uint8_t *addr = sc->mac_addr; 1045 int status; 1046 1047 1048 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1049 | (addr[2] << 8) | addr[3]); 1050 1051 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1052 1053 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1054 return status; 1055 } 1056 1057 static int 1058 mxge_change_pause(mxge_softc_t *sc, int pause) 1059 { 1060 mxge_cmd_t cmd; 1061 int status; 1062 1063 if (pause) 1064 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1065 &cmd); 1066 else 1067 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1068 &cmd); 1069 1070 if (status) { 1071 device_printf(sc->dev, "Failed to set flow control mode\n"); 1072 return ENXIO; 1073 } 1074 sc->pause = pause; 1075 return 0; 1076 } 1077 1078 static void 1079 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1080 { 1081 mxge_cmd_t cmd; 1082 int status; 1083 1084 if (mxge_always_promisc) 1085 promisc = 1; 1086 1087 if (promisc) 1088 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1089 &cmd); 1090 else 1091 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1092 &cmd); 1093 1094 if (status) { 1095 device_printf(sc->dev, "Failed to set promisc mode\n"); 1096 } 1097 } 1098 1099 static void 1100 mxge_set_multicast_list(mxge_softc_t *sc) 1101 { 1102 mxge_cmd_t cmd; 1103 struct ifmultiaddr *ifma; 1104 struct ifnet *ifp = sc->ifp; 1105 int err; 1106 1107 /* This firmware is known to not support multicast */ 1108 if (!sc->fw_multicast_support) 1109 return; 1110 1111 /* Disable multicast filtering while we play with the lists*/ 1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1113 if (err != 0) { 1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1115 " error status: %d\n", err); 1116 return; 1117 } 1118 1119 if (sc->adopted_rx_filter_bug) 1120 return; 1121 1122 if (ifp->if_flags & IFF_ALLMULTI) 1123 /* request to disable multicast filtering, so quit here */ 1124 return; 1125 1126 /* Flush all the filters */ 1127 1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1129 if (err != 0) { 1130 device_printf(sc->dev, 1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1132 ", error status: %d\n", err); 1133 return; 1134 } 1135 1136 /* Walk the multicast list, and add each address */ 1137 1138 if_maddr_rlock(ifp); 1139 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1140 if (ifma->ifma_addr->sa_family != AF_LINK) 1141 continue; 1142 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1143 &cmd.data0, 4); 1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1145 &cmd.data1, 2); 1146 cmd.data0 = htonl(cmd.data0); 1147 cmd.data1 = htonl(cmd.data1); 1148 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1149 if (err != 0) { 1150 device_printf(sc->dev, "Failed " 1151 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1152 "%d\t", err); 1153 /* abort, leaving multicast filtering off */ 1154 if_maddr_runlock(ifp); 1155 return; 1156 } 1157 } 1158 if_maddr_runlock(ifp); 1159 /* Enable multicast filtering */ 1160 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1161 if (err != 0) { 1162 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1163 ", error status: %d\n", err); 1164 } 1165 } 1166 1167 static int 1168 mxge_max_mtu(mxge_softc_t *sc) 1169 { 1170 mxge_cmd_t cmd; 1171 int status; 1172 1173 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1174 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1175 1176 /* try to set nbufs to see if it we can 1177 use virtually contiguous jumbos */ 1178 cmd.data0 = 0; 1179 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1180 &cmd); 1181 if (status == 0) 1182 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1183 1184 /* otherwise, we're limited to MJUMPAGESIZE */ 1185 return MJUMPAGESIZE - MXGEFW_PAD; 1186 } 1187 1188 static int 1189 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1190 { 1191 struct mxge_slice_state *ss; 1192 mxge_rx_done_t *rx_done; 1193 volatile uint32_t *irq_claim; 1194 mxge_cmd_t cmd; 1195 int slice, status; 1196 1197 /* try to send a reset command to the card to see if it 1198 is alive */ 1199 memset(&cmd, 0, sizeof (cmd)); 1200 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1201 if (status != 0) { 1202 device_printf(sc->dev, "failed reset\n"); 1203 return ENXIO; 1204 } 1205 1206 mxge_dummy_rdma(sc, 1); 1207 1208 1209 /* set the intrq size */ 1210 cmd.data0 = sc->rx_ring_size; 1211 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1212 1213 /* 1214 * Even though we already know how many slices are supported 1215 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1216 * has magic side effects, and must be called after a reset. 1217 * It must be called prior to calling any RSS related cmds, 1218 * including assigning an interrupt queue for anything but 1219 * slice 0. It must also be called *after* 1220 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1221 * the firmware to compute offsets. 1222 */ 1223 1224 if (sc->num_slices > 1) { 1225 /* ask the maximum number of slices it supports */ 1226 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1227 &cmd); 1228 if (status != 0) { 1229 device_printf(sc->dev, 1230 "failed to get number of slices\n"); 1231 return status; 1232 } 1233 /* 1234 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1235 * to setting up the interrupt queue DMA 1236 */ 1237 cmd.data0 = sc->num_slices; 1238 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1239 #ifdef IFNET_BUF_RING 1240 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1241 #endif 1242 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1243 &cmd); 1244 if (status != 0) { 1245 device_printf(sc->dev, 1246 "failed to set number of slices\n"); 1247 return status; 1248 } 1249 } 1250 1251 1252 if (interrupts_setup) { 1253 /* Now exchange information about interrupts */ 1254 for (slice = 0; slice < sc->num_slices; slice++) { 1255 rx_done = &sc->ss[slice].rx_done; 1256 memset(rx_done->entry, 0, sc->rx_ring_size); 1257 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1258 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1259 cmd.data2 = slice; 1260 status |= mxge_send_cmd(sc, 1261 MXGEFW_CMD_SET_INTRQ_DMA, 1262 &cmd); 1263 } 1264 } 1265 1266 status |= mxge_send_cmd(sc, 1267 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1268 1269 1270 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1271 1272 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1273 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 1276 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1277 &cmd); 1278 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1279 if (status != 0) { 1280 device_printf(sc->dev, "failed set interrupt parameters\n"); 1281 return status; 1282 } 1283 1284 1285 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1286 1287 1288 /* run a DMA benchmark */ 1289 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1290 1291 for (slice = 0; slice < sc->num_slices; slice++) { 1292 ss = &sc->ss[slice]; 1293 1294 ss->irq_claim = irq_claim + (2 * slice); 1295 /* reset mcp/driver shared state back to 0 */ 1296 ss->rx_done.idx = 0; 1297 ss->rx_done.cnt = 0; 1298 ss->tx.req = 0; 1299 ss->tx.done = 0; 1300 ss->tx.pkt_done = 0; 1301 ss->tx.queue_active = 0; 1302 ss->tx.activate = 0; 1303 ss->tx.deactivate = 0; 1304 ss->tx.wake = 0; 1305 ss->tx.defrag = 0; 1306 ss->tx.stall = 0; 1307 ss->rx_big.cnt = 0; 1308 ss->rx_small.cnt = 0; 1309 ss->lro_bad_csum = 0; 1310 ss->lro_queued = 0; 1311 ss->lro_flushed = 0; 1312 if (ss->fw_stats != NULL) { 1313 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1314 } 1315 } 1316 sc->rdma_tags_available = 15; 1317 status = mxge_update_mac_address(sc); 1318 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1319 mxge_change_pause(sc, sc->pause); 1320 mxge_set_multicast_list(sc); 1321 if (sc->throttle) { 1322 cmd.data0 = sc->throttle; 1323 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1324 &cmd)) { 1325 device_printf(sc->dev, 1326 "can't enable throttle\n"); 1327 } 1328 } 1329 return status; 1330 } 1331 1332 static int 1333 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1334 { 1335 mxge_cmd_t cmd; 1336 mxge_softc_t *sc; 1337 int err; 1338 unsigned int throttle; 1339 1340 sc = arg1; 1341 throttle = sc->throttle; 1342 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1343 if (err != 0) { 1344 return err; 1345 } 1346 1347 if (throttle == sc->throttle) 1348 return 0; 1349 1350 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1351 return EINVAL; 1352 1353 mtx_lock(&sc->driver_mtx); 1354 cmd.data0 = throttle; 1355 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1356 if (err == 0) 1357 sc->throttle = throttle; 1358 mtx_unlock(&sc->driver_mtx); 1359 return err; 1360 } 1361 1362 static int 1363 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1364 { 1365 mxge_softc_t *sc; 1366 unsigned int intr_coal_delay; 1367 int err; 1368 1369 sc = arg1; 1370 intr_coal_delay = sc->intr_coal_delay; 1371 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1372 if (err != 0) { 1373 return err; 1374 } 1375 if (intr_coal_delay == sc->intr_coal_delay) 1376 return 0; 1377 1378 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1379 return EINVAL; 1380 1381 mtx_lock(&sc->driver_mtx); 1382 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1383 sc->intr_coal_delay = intr_coal_delay; 1384 1385 mtx_unlock(&sc->driver_mtx); 1386 return err; 1387 } 1388 1389 static int 1390 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1391 { 1392 mxge_softc_t *sc; 1393 unsigned int enabled; 1394 int err; 1395 1396 sc = arg1; 1397 enabled = sc->pause; 1398 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1399 if (err != 0) { 1400 return err; 1401 } 1402 if (enabled == sc->pause) 1403 return 0; 1404 1405 mtx_lock(&sc->driver_mtx); 1406 err = mxge_change_pause(sc, enabled); 1407 mtx_unlock(&sc->driver_mtx); 1408 return err; 1409 } 1410 1411 static int 1412 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1413 { 1414 struct ifnet *ifp; 1415 int err = 0; 1416 1417 ifp = sc->ifp; 1418 if (lro_cnt == 0) 1419 ifp->if_capenable &= ~IFCAP_LRO; 1420 else 1421 ifp->if_capenable |= IFCAP_LRO; 1422 sc->lro_cnt = lro_cnt; 1423 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1424 mxge_close(sc, 0); 1425 err = mxge_open(sc); 1426 } 1427 return err; 1428 } 1429 1430 static int 1431 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1432 { 1433 mxge_softc_t *sc; 1434 unsigned int lro_cnt; 1435 int err; 1436 1437 sc = arg1; 1438 lro_cnt = sc->lro_cnt; 1439 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1440 if (err != 0) 1441 return err; 1442 1443 if (lro_cnt == sc->lro_cnt) 1444 return 0; 1445 1446 if (lro_cnt > 128) 1447 return EINVAL; 1448 1449 mtx_lock(&sc->driver_mtx); 1450 err = mxge_change_lro_locked(sc, lro_cnt); 1451 mtx_unlock(&sc->driver_mtx); 1452 return err; 1453 } 1454 1455 static int 1456 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1457 { 1458 int err; 1459 1460 if (arg1 == NULL) 1461 return EFAULT; 1462 arg2 = be32toh(*(int *)arg1); 1463 arg1 = NULL; 1464 err = sysctl_handle_int(oidp, arg1, arg2, req); 1465 1466 return err; 1467 } 1468 1469 static void 1470 mxge_rem_sysctls(mxge_softc_t *sc) 1471 { 1472 struct mxge_slice_state *ss; 1473 int slice; 1474 1475 if (sc->slice_sysctl_tree == NULL) 1476 return; 1477 1478 for (slice = 0; slice < sc->num_slices; slice++) { 1479 ss = &sc->ss[slice]; 1480 if (ss == NULL || ss->sysctl_tree == NULL) 1481 continue; 1482 sysctl_ctx_free(&ss->sysctl_ctx); 1483 ss->sysctl_tree = NULL; 1484 } 1485 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1486 sc->slice_sysctl_tree = NULL; 1487 } 1488 1489 static void 1490 mxge_add_sysctls(mxge_softc_t *sc) 1491 { 1492 struct sysctl_ctx_list *ctx; 1493 struct sysctl_oid_list *children; 1494 mcp_irq_data_t *fw; 1495 struct mxge_slice_state *ss; 1496 int slice; 1497 char slice_num[8]; 1498 1499 ctx = device_get_sysctl_ctx(sc->dev); 1500 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1501 fw = sc->ss[0].fw_stats; 1502 1503 /* random information */ 1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1505 "firmware_version", 1506 CTLFLAG_RD, &sc->fw_version, 1507 0, "firmware version"); 1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1509 "serial_number", 1510 CTLFLAG_RD, &sc->serial_number_string, 1511 0, "serial number"); 1512 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1513 "product_code", 1514 CTLFLAG_RD, &sc->product_code_string, 1515 0, "product_code"); 1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1517 "pcie_link_width", 1518 CTLFLAG_RD, &sc->link_width, 1519 0, "tx_boundary"); 1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1521 "tx_boundary", 1522 CTLFLAG_RD, &sc->tx_boundary, 1523 0, "tx_boundary"); 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1525 "write_combine", 1526 CTLFLAG_RD, &sc->wc, 1527 0, "write combining PIO?"); 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1529 "read_dma_MBs", 1530 CTLFLAG_RD, &sc->read_dma, 1531 0, "DMA Read speed in MB/s"); 1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1533 "write_dma_MBs", 1534 CTLFLAG_RD, &sc->write_dma, 1535 0, "DMA Write speed in MB/s"); 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1537 "read_write_dma_MBs", 1538 CTLFLAG_RD, &sc->read_write_dma, 1539 0, "DMA concurrent Read/Write speed in MB/s"); 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1541 "watchdog_resets", 1542 CTLFLAG_RD, &sc->watchdog_resets, 1543 0, "Number of times NIC was reset"); 1544 1545 1546 /* performance related tunables */ 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "intr_coal_delay", 1549 CTLTYPE_INT|CTLFLAG_RW, sc, 1550 0, mxge_change_intr_coal, 1551 "I", "interrupt coalescing delay in usecs"); 1552 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "throttle", 1555 CTLTYPE_INT|CTLFLAG_RW, sc, 1556 0, mxge_change_throttle, 1557 "I", "transmit throttling"); 1558 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "flow_control_enabled", 1561 CTLTYPE_INT|CTLFLAG_RW, sc, 1562 0, mxge_change_flow_control, 1563 "I", "interrupt coalescing delay in usecs"); 1564 1565 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1566 "deassert_wait", 1567 CTLFLAG_RW, &mxge_deassert_wait, 1568 0, "Wait for IRQ line to go low in ihandler"); 1569 1570 /* stats block from firmware is in network byte order. 1571 Need to swap it */ 1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1573 "link_up", 1574 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1575 0, mxge_handle_be32, 1576 "I", "link up"); 1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1578 "rdma_tags_available", 1579 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1580 0, mxge_handle_be32, 1581 "I", "rdma_tags_available"); 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_bad_crc32", 1584 CTLTYPE_INT|CTLFLAG_RD, 1585 &fw->dropped_bad_crc32, 1586 0, mxge_handle_be32, 1587 "I", "dropped_bad_crc32"); 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "dropped_bad_phy", 1590 CTLTYPE_INT|CTLFLAG_RD, 1591 &fw->dropped_bad_phy, 1592 0, mxge_handle_be32, 1593 "I", "dropped_bad_phy"); 1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1595 "dropped_link_error_or_filtered", 1596 CTLTYPE_INT|CTLFLAG_RD, 1597 &fw->dropped_link_error_or_filtered, 1598 0, mxge_handle_be32, 1599 "I", "dropped_link_error_or_filtered"); 1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1601 "dropped_link_overflow", 1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1603 0, mxge_handle_be32, 1604 "I", "dropped_link_overflow"); 1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1606 "dropped_multicast_filtered", 1607 CTLTYPE_INT|CTLFLAG_RD, 1608 &fw->dropped_multicast_filtered, 1609 0, mxge_handle_be32, 1610 "I", "dropped_multicast_filtered"); 1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1612 "dropped_no_big_buffer", 1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1614 0, mxge_handle_be32, 1615 "I", "dropped_no_big_buffer"); 1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1617 "dropped_no_small_buffer", 1618 CTLTYPE_INT|CTLFLAG_RD, 1619 &fw->dropped_no_small_buffer, 1620 0, mxge_handle_be32, 1621 "I", "dropped_no_small_buffer"); 1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1623 "dropped_overrun", 1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1625 0, mxge_handle_be32, 1626 "I", "dropped_overrun"); 1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1628 "dropped_pause", 1629 CTLTYPE_INT|CTLFLAG_RD, 1630 &fw->dropped_pause, 1631 0, mxge_handle_be32, 1632 "I", "dropped_pause"); 1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1634 "dropped_runt", 1635 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1636 0, mxge_handle_be32, 1637 "I", "dropped_runt"); 1638 1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1640 "dropped_unicast_filtered", 1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1642 0, mxge_handle_be32, 1643 "I", "dropped_unicast_filtered"); 1644 1645 /* verbose printing? */ 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "verbose", 1648 CTLFLAG_RW, &mxge_verbose, 1649 0, "verbose printing"); 1650 1651 /* lro */ 1652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1653 "lro_cnt", 1654 CTLTYPE_INT|CTLFLAG_RW, sc, 1655 0, mxge_change_lro, 1656 "I", "number of lro merge queues"); 1657 1658 1659 /* add counters exported for debugging from all slices */ 1660 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1661 sc->slice_sysctl_tree = 1662 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1663 "slice", CTLFLAG_RD, 0, ""); 1664 1665 for (slice = 0; slice < sc->num_slices; slice++) { 1666 ss = &sc->ss[slice]; 1667 sysctl_ctx_init(&ss->sysctl_ctx); 1668 ctx = &ss->sysctl_ctx; 1669 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1670 sprintf(slice_num, "%d", slice); 1671 ss->sysctl_tree = 1672 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1673 CTLFLAG_RD, 0, ""); 1674 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "rx_small_cnt", 1677 CTLFLAG_RD, &ss->rx_small.cnt, 1678 0, "rx_small_cnt"); 1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1680 "rx_big_cnt", 1681 CTLFLAG_RD, &ss->rx_big.cnt, 1682 0, "rx_small_cnt"); 1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1684 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1685 0, "number of lro merge queues flushed"); 1686 1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1688 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1689 0, "number of frames appended to lro merge" 1690 "queues"); 1691 1692 #ifndef IFNET_BUF_RING 1693 /* only transmit from slice 0 for now */ 1694 if (slice > 0) 1695 continue; 1696 #endif 1697 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1698 "tx_req", 1699 CTLFLAG_RD, &ss->tx.req, 1700 0, "tx_req"); 1701 1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1703 "tx_done", 1704 CTLFLAG_RD, &ss->tx.done, 1705 0, "tx_done"); 1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1707 "tx_pkt_done", 1708 CTLFLAG_RD, &ss->tx.pkt_done, 1709 0, "tx_done"); 1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1711 "tx_stall", 1712 CTLFLAG_RD, &ss->tx.stall, 1713 0, "tx_stall"); 1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1715 "tx_wake", 1716 CTLFLAG_RD, &ss->tx.wake, 1717 0, "tx_wake"); 1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1719 "tx_defrag", 1720 CTLFLAG_RD, &ss->tx.defrag, 1721 0, "tx_defrag"); 1722 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1723 "tx_queue_active", 1724 CTLFLAG_RD, &ss->tx.queue_active, 1725 0, "tx_queue_active"); 1726 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1727 "tx_activate", 1728 CTLFLAG_RD, &ss->tx.activate, 1729 0, "tx_activate"); 1730 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1731 "tx_deactivate", 1732 CTLFLAG_RD, &ss->tx.deactivate, 1733 0, "tx_deactivate"); 1734 } 1735 } 1736 1737 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1738 backwards one at a time and handle ring wraps */ 1739 1740 static inline void 1741 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1742 mcp_kreq_ether_send_t *src, int cnt) 1743 { 1744 int idx, starting_slot; 1745 starting_slot = tx->req; 1746 while (cnt > 1) { 1747 cnt--; 1748 idx = (starting_slot + cnt) & tx->mask; 1749 mxge_pio_copy(&tx->lanai[idx], 1750 &src[cnt], sizeof(*src)); 1751 wmb(); 1752 } 1753 } 1754 1755 /* 1756 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1757 * at most 32 bytes at a time, so as to avoid involving the software 1758 * pio handler in the nic. We re-write the first segment's flags 1759 * to mark them valid only after writing the entire chain 1760 */ 1761 1762 static inline void 1763 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1764 int cnt) 1765 { 1766 int idx, i; 1767 uint32_t *src_ints; 1768 volatile uint32_t *dst_ints; 1769 mcp_kreq_ether_send_t *srcp; 1770 volatile mcp_kreq_ether_send_t *dstp, *dst; 1771 uint8_t last_flags; 1772 1773 idx = tx->req & tx->mask; 1774 1775 last_flags = src->flags; 1776 src->flags = 0; 1777 wmb(); 1778 dst = dstp = &tx->lanai[idx]; 1779 srcp = src; 1780 1781 if ((idx + cnt) < tx->mask) { 1782 for (i = 0; i < (cnt - 1); i += 2) { 1783 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1784 wmb(); /* force write every 32 bytes */ 1785 srcp += 2; 1786 dstp += 2; 1787 } 1788 } else { 1789 /* submit all but the first request, and ensure 1790 that it is submitted below */ 1791 mxge_submit_req_backwards(tx, src, cnt); 1792 i = 0; 1793 } 1794 if (i < cnt) { 1795 /* submit the first request */ 1796 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1797 wmb(); /* barrier before setting valid flag */ 1798 } 1799 1800 /* re-write the last 32-bits with the valid flags */ 1801 src->flags = last_flags; 1802 src_ints = (uint32_t *)src; 1803 src_ints+=3; 1804 dst_ints = (volatile uint32_t *)dst; 1805 dst_ints+=3; 1806 *dst_ints = *src_ints; 1807 tx->req += cnt; 1808 wmb(); 1809 } 1810 1811 #if IFCAP_TSO4 1812 1813 static void 1814 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1815 int busdma_seg_cnt, int ip_off) 1816 { 1817 mxge_tx_ring_t *tx; 1818 mcp_kreq_ether_send_t *req; 1819 bus_dma_segment_t *seg; 1820 struct ip *ip; 1821 struct tcphdr *tcp; 1822 uint32_t low, high_swapped; 1823 int len, seglen, cum_len, cum_len_next; 1824 int next_is_first, chop, cnt, rdma_count, small; 1825 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1826 uint8_t flags, flags_next; 1827 static int once; 1828 1829 mss = m->m_pkthdr.tso_segsz; 1830 1831 /* negative cum_len signifies to the 1832 * send loop that we are still in the 1833 * header portion of the TSO packet. 1834 */ 1835 1836 /* ensure we have the ethernet, IP and TCP 1837 header together in the first mbuf, copy 1838 it to a scratch buffer if not */ 1839 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1840 m_copydata(m, 0, ip_off + sizeof (*ip), 1841 ss->scratch); 1842 ip = (struct ip *)(ss->scratch + ip_off); 1843 } else { 1844 ip = (struct ip *)(mtod(m, char *) + ip_off); 1845 } 1846 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1847 + sizeof (*tcp))) { 1848 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1849 + sizeof (*tcp), ss->scratch); 1850 ip = (struct ip *)(mtod(m, char *) + ip_off); 1851 } 1852 1853 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1854 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1855 1856 /* TSO implies checksum offload on this hardware */ 1857 cksum_offset = ip_off + (ip->ip_hl << 2); 1858 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1859 1860 1861 /* for TSO, pseudo_hdr_offset holds mss. 1862 * The firmware figures out where to put 1863 * the checksum by parsing the header. */ 1864 pseudo_hdr_offset = htobe16(mss); 1865 1866 tx = &ss->tx; 1867 req = tx->req_list; 1868 seg = tx->seg_list; 1869 cnt = 0; 1870 rdma_count = 0; 1871 /* "rdma_count" is the number of RDMAs belonging to the 1872 * current packet BEFORE the current send request. For 1873 * non-TSO packets, this is equal to "count". 1874 * For TSO packets, rdma_count needs to be reset 1875 * to 0 after a segment cut. 1876 * 1877 * The rdma_count field of the send request is 1878 * the number of RDMAs of the packet starting at 1879 * that request. For TSO send requests with one ore more cuts 1880 * in the middle, this is the number of RDMAs starting 1881 * after the last cut in the request. All previous 1882 * segments before the last cut implicitly have 1 RDMA. 1883 * 1884 * Since the number of RDMAs is not known beforehand, 1885 * it must be filled-in retroactively - after each 1886 * segmentation cut or at the end of the entire packet. 1887 */ 1888 1889 while (busdma_seg_cnt) { 1890 /* Break the busdma segment up into pieces*/ 1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1893 len = seg->ds_len; 1894 1895 while (len) { 1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1897 seglen = len; 1898 cum_len_next = cum_len + seglen; 1899 (req-rdma_count)->rdma_count = rdma_count + 1; 1900 if (__predict_true(cum_len >= 0)) { 1901 /* payload */ 1902 chop = (cum_len_next > mss); 1903 cum_len_next = cum_len_next % mss; 1904 next_is_first = (cum_len_next == 0); 1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1906 flags_next |= next_is_first * 1907 MXGEFW_FLAGS_FIRST; 1908 rdma_count |= -(chop | next_is_first); 1909 rdma_count += chop & !next_is_first; 1910 } else if (cum_len_next >= 0) { 1911 /* header ends */ 1912 rdma_count = -1; 1913 cum_len_next = 0; 1914 seglen = -cum_len; 1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1916 flags_next = MXGEFW_FLAGS_TSO_PLD | 1917 MXGEFW_FLAGS_FIRST | 1918 (small * MXGEFW_FLAGS_SMALL); 1919 } 1920 1921 req->addr_high = high_swapped; 1922 req->addr_low = htobe32(low); 1923 req->pseudo_hdr_offset = pseudo_hdr_offset; 1924 req->pad = 0; 1925 req->rdma_count = 1; 1926 req->length = htobe16(seglen); 1927 req->cksum_offset = cksum_offset; 1928 req->flags = flags | ((cum_len & 1) * 1929 MXGEFW_FLAGS_ALIGN_ODD); 1930 low += seglen; 1931 len -= seglen; 1932 cum_len = cum_len_next; 1933 flags = flags_next; 1934 req++; 1935 cnt++; 1936 rdma_count++; 1937 if (__predict_false(cksum_offset > seglen)) 1938 cksum_offset -= seglen; 1939 else 1940 cksum_offset = 0; 1941 if (__predict_false(cnt > tx->max_desc)) 1942 goto drop; 1943 } 1944 busdma_seg_cnt--; 1945 seg++; 1946 } 1947 (req-rdma_count)->rdma_count = rdma_count; 1948 1949 do { 1950 req--; 1951 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1953 1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1955 mxge_submit_req(tx, tx->req_list, cnt); 1956 #ifdef IFNET_BUF_RING 1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1958 /* tell the NIC to start polling this slice */ 1959 *tx->send_go = 1; 1960 tx->queue_active = 1; 1961 tx->activate++; 1962 wmb(); 1963 } 1964 #endif 1965 return; 1966 1967 drop: 1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1969 m_freem(m); 1970 ss->oerrors++; 1971 if (!once) { 1972 printf("tx->max_desc exceeded via TSO!\n"); 1973 printf("mss = %d, %ld, %d!\n", mss, 1974 (long)seg - (long)tx->seg_list, tx->max_desc); 1975 once = 1; 1976 } 1977 return; 1978 1979 } 1980 1981 #endif /* IFCAP_TSO4 */ 1982 1983 #ifdef MXGE_NEW_VLAN_API 1984 /* 1985 * We reproduce the software vlan tag insertion from 1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1987 * vlan tag insertion. We need to advertise this in order to have the 1988 * vlan interface respect our csum offload flags. 1989 */ 1990 static struct mbuf * 1991 mxge_vlan_tag_insert(struct mbuf *m) 1992 { 1993 struct ether_vlan_header *evl; 1994 1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1996 if (__predict_false(m == NULL)) 1997 return NULL; 1998 if (m->m_len < sizeof(*evl)) { 1999 m = m_pullup(m, sizeof(*evl)); 2000 if (__predict_false(m == NULL)) 2001 return NULL; 2002 } 2003 /* 2004 * Transform the Ethernet header into an Ethernet header 2005 * with 802.1Q encapsulation. 2006 */ 2007 evl = mtod(m, struct ether_vlan_header *); 2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2012 m->m_flags &= ~M_VLANTAG; 2013 return m; 2014 } 2015 #endif /* MXGE_NEW_VLAN_API */ 2016 2017 static void 2018 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2019 { 2020 mxge_softc_t *sc; 2021 mcp_kreq_ether_send_t *req; 2022 bus_dma_segment_t *seg; 2023 struct mbuf *m_tmp; 2024 struct ifnet *ifp; 2025 mxge_tx_ring_t *tx; 2026 struct ip *ip; 2027 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2028 uint16_t pseudo_hdr_offset; 2029 uint8_t flags, cksum_offset; 2030 2031 2032 sc = ss->sc; 2033 ifp = sc->ifp; 2034 tx = &ss->tx; 2035 2036 ip_off = sizeof (struct ether_header); 2037 #ifdef MXGE_NEW_VLAN_API 2038 if (m->m_flags & M_VLANTAG) { 2039 m = mxge_vlan_tag_insert(m); 2040 if (__predict_false(m == NULL)) 2041 goto drop; 2042 ip_off += ETHER_VLAN_ENCAP_LEN; 2043 } 2044 #endif 2045 /* (try to) map the frame for DMA */ 2046 idx = tx->req & tx->mask; 2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2048 m, tx->seg_list, &cnt, 2049 BUS_DMA_NOWAIT); 2050 if (__predict_false(err == EFBIG)) { 2051 /* Too many segments in the chain. Try 2052 to defrag */ 2053 m_tmp = m_defrag(m, M_NOWAIT); 2054 if (m_tmp == NULL) { 2055 goto drop; 2056 } 2057 ss->tx.defrag++; 2058 m = m_tmp; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2060 tx->info[idx].map, 2061 m, tx->seg_list, &cnt, 2062 BUS_DMA_NOWAIT); 2063 } 2064 if (__predict_false(err != 0)) { 2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2066 " packet len = %d\n", err, m->m_pkthdr.len); 2067 goto drop; 2068 } 2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2070 BUS_DMASYNC_PREWRITE); 2071 tx->info[idx].m = m; 2072 2073 #if IFCAP_TSO4 2074 /* TSO is different enough, we handle it in another routine */ 2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2076 mxge_encap_tso(ss, m, cnt, ip_off); 2077 return; 2078 } 2079 #endif 2080 2081 req = tx->req_list; 2082 cksum_offset = 0; 2083 pseudo_hdr_offset = 0; 2084 flags = MXGEFW_FLAGS_NO_TSO; 2085 2086 /* checksum offloading? */ 2087 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2088 /* ensure ip header is in first mbuf, copy 2089 it to a scratch buffer if not */ 2090 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2091 m_copydata(m, 0, ip_off + sizeof (*ip), 2092 ss->scratch); 2093 ip = (struct ip *)(ss->scratch + ip_off); 2094 } else { 2095 ip = (struct ip *)(mtod(m, char *) + ip_off); 2096 } 2097 cksum_offset = ip_off + (ip->ip_hl << 2); 2098 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2099 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2100 req->cksum_offset = cksum_offset; 2101 flags |= MXGEFW_FLAGS_CKSUM; 2102 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2103 } else { 2104 odd_flag = 0; 2105 } 2106 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2107 flags |= MXGEFW_FLAGS_SMALL; 2108 2109 /* convert segments into a request list */ 2110 cum_len = 0; 2111 seg = tx->seg_list; 2112 req->flags = MXGEFW_FLAGS_FIRST; 2113 for (i = 0; i < cnt; i++) { 2114 req->addr_low = 2115 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2116 req->addr_high = 2117 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2118 req->length = htobe16(seg->ds_len); 2119 req->cksum_offset = cksum_offset; 2120 if (cksum_offset > seg->ds_len) 2121 cksum_offset -= seg->ds_len; 2122 else 2123 cksum_offset = 0; 2124 req->pseudo_hdr_offset = pseudo_hdr_offset; 2125 req->pad = 0; /* complete solid 16-byte block */ 2126 req->rdma_count = 1; 2127 req->flags |= flags | ((cum_len & 1) * odd_flag); 2128 cum_len += seg->ds_len; 2129 seg++; 2130 req++; 2131 req->flags = 0; 2132 } 2133 req--; 2134 /* pad runts to 60 bytes */ 2135 if (cum_len < 60) { 2136 req++; 2137 req->addr_low = 2138 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2139 req->addr_high = 2140 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2141 req->length = htobe16(60 - cum_len); 2142 req->cksum_offset = 0; 2143 req->pseudo_hdr_offset = pseudo_hdr_offset; 2144 req->pad = 0; /* complete solid 16-byte block */ 2145 req->rdma_count = 1; 2146 req->flags |= flags | ((cum_len & 1) * odd_flag); 2147 cnt++; 2148 } 2149 2150 tx->req_list[0].rdma_count = cnt; 2151 #if 0 2152 /* print what the firmware will see */ 2153 for (i = 0; i < cnt; i++) { 2154 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2155 "cso:%d, flags:0x%x, rdma:%d\n", 2156 i, (int)ntohl(tx->req_list[i].addr_high), 2157 (int)ntohl(tx->req_list[i].addr_low), 2158 (int)ntohs(tx->req_list[i].length), 2159 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2160 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2161 tx->req_list[i].rdma_count); 2162 } 2163 printf("--------------\n"); 2164 #endif 2165 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2166 mxge_submit_req(tx, tx->req_list, cnt); 2167 #ifdef IFNET_BUF_RING 2168 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2169 /* tell the NIC to start polling this slice */ 2170 *tx->send_go = 1; 2171 tx->queue_active = 1; 2172 tx->activate++; 2173 wmb(); 2174 } 2175 #endif 2176 return; 2177 2178 drop: 2179 m_freem(m); 2180 ss->oerrors++; 2181 return; 2182 } 2183 2184 #ifdef IFNET_BUF_RING 2185 static void 2186 mxge_qflush(struct ifnet *ifp) 2187 { 2188 mxge_softc_t *sc = ifp->if_softc; 2189 mxge_tx_ring_t *tx; 2190 struct mbuf *m; 2191 int slice; 2192 2193 for (slice = 0; slice < sc->num_slices; slice++) { 2194 tx = &sc->ss[slice].tx; 2195 mtx_lock(&tx->mtx); 2196 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2197 m_freem(m); 2198 mtx_unlock(&tx->mtx); 2199 } 2200 if_qflush(ifp); 2201 } 2202 2203 static inline void 2204 mxge_start_locked(struct mxge_slice_state *ss) 2205 { 2206 mxge_softc_t *sc; 2207 struct mbuf *m; 2208 struct ifnet *ifp; 2209 mxge_tx_ring_t *tx; 2210 2211 sc = ss->sc; 2212 ifp = sc->ifp; 2213 tx = &ss->tx; 2214 2215 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2216 m = drbr_dequeue(ifp, tx->br); 2217 if (m == NULL) { 2218 return; 2219 } 2220 /* let BPF see it */ 2221 BPF_MTAP(ifp, m); 2222 2223 /* give it to the nic */ 2224 mxge_encap(ss, m); 2225 } 2226 /* ran out of transmit slots */ 2227 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2228 && (!drbr_empty(ifp, tx->br))) { 2229 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2230 tx->stall++; 2231 } 2232 } 2233 2234 static int 2235 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2236 { 2237 mxge_softc_t *sc; 2238 struct ifnet *ifp; 2239 mxge_tx_ring_t *tx; 2240 int err; 2241 2242 sc = ss->sc; 2243 ifp = sc->ifp; 2244 tx = &ss->tx; 2245 2246 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2247 IFF_DRV_RUNNING) { 2248 err = drbr_enqueue(ifp, tx->br, m); 2249 return (err); 2250 } 2251 2252 if (drbr_empty(ifp, tx->br) && 2253 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2254 /* let BPF see it */ 2255 BPF_MTAP(ifp, m); 2256 /* give it to the nic */ 2257 mxge_encap(ss, m); 2258 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2259 return (err); 2260 } 2261 if (!drbr_empty(ifp, tx->br)) 2262 mxge_start_locked(ss); 2263 return (0); 2264 } 2265 2266 static int 2267 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2268 { 2269 mxge_softc_t *sc = ifp->if_softc; 2270 struct mxge_slice_state *ss; 2271 mxge_tx_ring_t *tx; 2272 int err = 0; 2273 int slice; 2274 2275 slice = m->m_pkthdr.flowid; 2276 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2277 2278 ss = &sc->ss[slice]; 2279 tx = &ss->tx; 2280 2281 if (mtx_trylock(&tx->mtx)) { 2282 err = mxge_transmit_locked(ss, m); 2283 mtx_unlock(&tx->mtx); 2284 } else { 2285 err = drbr_enqueue(ifp, tx->br, m); 2286 } 2287 2288 return (err); 2289 } 2290 2291 #else 2292 2293 static inline void 2294 mxge_start_locked(struct mxge_slice_state *ss) 2295 { 2296 mxge_softc_t *sc; 2297 struct mbuf *m; 2298 struct ifnet *ifp; 2299 mxge_tx_ring_t *tx; 2300 2301 sc = ss->sc; 2302 ifp = sc->ifp; 2303 tx = &ss->tx; 2304 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2305 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2306 if (m == NULL) { 2307 return; 2308 } 2309 /* let BPF see it */ 2310 BPF_MTAP(ifp, m); 2311 2312 /* give it to the nic */ 2313 mxge_encap(ss, m); 2314 } 2315 /* ran out of transmit slots */ 2316 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2317 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2318 tx->stall++; 2319 } 2320 } 2321 #endif 2322 static void 2323 mxge_start(struct ifnet *ifp) 2324 { 2325 mxge_softc_t *sc = ifp->if_softc; 2326 struct mxge_slice_state *ss; 2327 2328 /* only use the first slice for now */ 2329 ss = &sc->ss[0]; 2330 mtx_lock(&ss->tx.mtx); 2331 mxge_start_locked(ss); 2332 mtx_unlock(&ss->tx.mtx); 2333 } 2334 2335 /* 2336 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2337 * at most 32 bytes at a time, so as to avoid involving the software 2338 * pio handler in the nic. We re-write the first segment's low 2339 * DMA address to mark it valid only after we write the entire chunk 2340 * in a burst 2341 */ 2342 static inline void 2343 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2344 mcp_kreq_ether_recv_t *src) 2345 { 2346 uint32_t low; 2347 2348 low = src->addr_low; 2349 src->addr_low = 0xffffffff; 2350 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2351 wmb(); 2352 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2353 wmb(); 2354 src->addr_low = low; 2355 dst->addr_low = low; 2356 wmb(); 2357 } 2358 2359 static int 2360 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2361 { 2362 bus_dma_segment_t seg; 2363 struct mbuf *m; 2364 mxge_rx_ring_t *rx = &ss->rx_small; 2365 int cnt, err; 2366 2367 m = m_gethdr(M_DONTWAIT, MT_DATA); 2368 if (m == NULL) { 2369 rx->alloc_fail++; 2370 err = ENOBUFS; 2371 goto done; 2372 } 2373 m->m_len = MHLEN; 2374 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2375 &seg, &cnt, BUS_DMA_NOWAIT); 2376 if (err != 0) { 2377 m_free(m); 2378 goto done; 2379 } 2380 rx->info[idx].m = m; 2381 rx->shadow[idx].addr_low = 2382 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2383 rx->shadow[idx].addr_high = 2384 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2385 2386 done: 2387 if ((idx & 7) == 7) 2388 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2389 return err; 2390 } 2391 2392 static int 2393 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2394 { 2395 bus_dma_segment_t seg[3]; 2396 struct mbuf *m; 2397 mxge_rx_ring_t *rx = &ss->rx_big; 2398 int cnt, err, i; 2399 2400 if (rx->cl_size == MCLBYTES) 2401 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2402 else 2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_len = rx->mlen; 2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2411 seg, &cnt, BUS_DMA_NOWAIT); 2412 if (err != 0) { 2413 m_free(m); 2414 goto done; 2415 } 2416 rx->info[idx].m = m; 2417 rx->shadow[idx].addr_low = 2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2419 rx->shadow[idx].addr_high = 2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2421 2422 #if MXGE_VIRT_JUMBOS 2423 for (i = 1; i < cnt; i++) { 2424 rx->shadow[idx + i].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2426 rx->shadow[idx + i].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2428 } 2429 #endif 2430 2431 done: 2432 for (i = 0; i < rx->nbufs; i++) { 2433 if ((idx & 7) == 7) { 2434 mxge_submit_8rx(&rx->lanai[idx - 7], 2435 &rx->shadow[idx - 7]); 2436 } 2437 idx++; 2438 } 2439 return err; 2440 } 2441 2442 /* 2443 * Myri10GE hardware checksums are not valid if the sender 2444 * padded the frame with non-zero padding. This is because 2445 * the firmware just does a simple 16-bit 1s complement 2446 * checksum across the entire frame, excluding the first 14 2447 * bytes. It is best to simply to check the checksum and 2448 * tell the stack about it only if the checksum is good 2449 */ 2450 2451 static inline uint16_t 2452 mxge_rx_csum(struct mbuf *m, int csum) 2453 { 2454 struct ether_header *eh; 2455 struct ip *ip; 2456 uint16_t c; 2457 2458 eh = mtod(m, struct ether_header *); 2459 2460 /* only deal with IPv4 TCP & UDP for now */ 2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2462 return 1; 2463 ip = (struct ip *)(eh + 1); 2464 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2465 ip->ip_p != IPPROTO_UDP)) 2466 return 1; 2467 #ifdef INET 2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2469 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2470 - (ip->ip_hl << 2) + ip->ip_p)); 2471 #else 2472 c = 1; 2473 #endif 2474 c ^= 0xffff; 2475 return (c); 2476 } 2477 2478 static void 2479 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2480 { 2481 struct ether_vlan_header *evl; 2482 struct ether_header *eh; 2483 uint32_t partial; 2484 2485 evl = mtod(m, struct ether_vlan_header *); 2486 eh = mtod(m, struct ether_header *); 2487 2488 /* 2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2490 * after what the firmware thought was the end of the ethernet 2491 * header. 2492 */ 2493 2494 /* put checksum into host byte order */ 2495 *csum = ntohs(*csum); 2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2497 (*csum) += ~partial; 2498 (*csum) += ((*csum) < ~partial); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2501 2502 /* restore checksum to network byte order; 2503 later consumers expect this */ 2504 *csum = htons(*csum); 2505 2506 /* save the tag */ 2507 #ifdef MXGE_NEW_VLAN_API 2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2509 #else 2510 { 2511 struct m_tag *mtag; 2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2513 M_NOWAIT); 2514 if (mtag == NULL) 2515 return; 2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2517 m_tag_prepend(m, mtag); 2518 } 2519 2520 #endif 2521 m->m_flags |= M_VLANTAG; 2522 2523 /* 2524 * Remove the 802.1q header by copying the Ethernet 2525 * addresses over it and adjusting the beginning of 2526 * the data in the mbuf. The encapsulated Ethernet 2527 * type field is already in place. 2528 */ 2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2530 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2531 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2532 } 2533 2534 2535 static inline void 2536 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2537 { 2538 mxge_softc_t *sc; 2539 struct ifnet *ifp; 2540 struct mbuf *m; 2541 struct ether_header *eh; 2542 mxge_rx_ring_t *rx; 2543 bus_dmamap_t old_map; 2544 int idx; 2545 uint16_t tcpudp_csum; 2546 2547 sc = ss->sc; 2548 ifp = sc->ifp; 2549 rx = &ss->rx_big; 2550 idx = rx->cnt & rx->mask; 2551 rx->cnt += rx->nbufs; 2552 /* save a pointer to the received mbuf */ 2553 m = rx->info[idx].m; 2554 /* try to replace the received mbuf */ 2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2556 /* drop the frame -- the old mbuf is re-cycled */ 2557 ifp->if_ierrors++; 2558 return; 2559 } 2560 2561 /* unmap the received buffer */ 2562 old_map = rx->info[idx].map; 2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2564 bus_dmamap_unload(rx->dmat, old_map); 2565 2566 /* swap the bus_dmamap_t's */ 2567 rx->info[idx].map = rx->extra_map; 2568 rx->extra_map = old_map; 2569 2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2571 * aligned */ 2572 m->m_data += MXGEFW_PAD; 2573 2574 m->m_pkthdr.rcvif = ifp; 2575 m->m_len = m->m_pkthdr.len = len; 2576 ss->ipackets++; 2577 eh = mtod(m, struct ether_header *); 2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2579 mxge_vlan_tag_remove(m, &csum); 2580 } 2581 /* if the checksum is valid, mark it in the mbuf header */ 2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2584 return; 2585 /* otherwise, it was a UDP frame, or a TCP frame which 2586 we could not do LRO on. Tell the stack that the 2587 checksum is good */ 2588 m->m_pkthdr.csum_data = 0xffff; 2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2590 } 2591 /* flowid only valid if RSS hashing is enabled */ 2592 if (sc->num_slices > 1) { 2593 m->m_pkthdr.flowid = (ss - sc->ss); 2594 m->m_flags |= M_FLOWID; 2595 } 2596 /* pass the frame up the stack */ 2597 (*ifp->if_input)(ifp, m); 2598 } 2599 2600 static inline void 2601 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2602 { 2603 mxge_softc_t *sc; 2604 struct ifnet *ifp; 2605 struct ether_header *eh; 2606 struct mbuf *m; 2607 mxge_rx_ring_t *rx; 2608 bus_dmamap_t old_map; 2609 int idx; 2610 uint16_t tcpudp_csum; 2611 2612 sc = ss->sc; 2613 ifp = sc->ifp; 2614 rx = &ss->rx_small; 2615 idx = rx->cnt & rx->mask; 2616 rx->cnt++; 2617 /* save a pointer to the received mbuf */ 2618 m = rx->info[idx].m; 2619 /* try to replace the received mbuf */ 2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2621 /* drop the frame -- the old mbuf is re-cycled */ 2622 ifp->if_ierrors++; 2623 return; 2624 } 2625 2626 /* unmap the received buffer */ 2627 old_map = rx->info[idx].map; 2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2629 bus_dmamap_unload(rx->dmat, old_map); 2630 2631 /* swap the bus_dmamap_t's */ 2632 rx->info[idx].map = rx->extra_map; 2633 rx->extra_map = old_map; 2634 2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2636 * aligned */ 2637 m->m_data += MXGEFW_PAD; 2638 2639 m->m_pkthdr.rcvif = ifp; 2640 m->m_len = m->m_pkthdr.len = len; 2641 ss->ipackets++; 2642 eh = mtod(m, struct ether_header *); 2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2644 mxge_vlan_tag_remove(m, &csum); 2645 } 2646 /* if the checksum is valid, mark it in the mbuf header */ 2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2649 return; 2650 /* otherwise, it was a UDP frame, or a TCP frame which 2651 we could not do LRO on. Tell the stack that the 2652 checksum is good */ 2653 m->m_pkthdr.csum_data = 0xffff; 2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2655 } 2656 /* flowid only valid if RSS hashing is enabled */ 2657 if (sc->num_slices > 1) { 2658 m->m_pkthdr.flowid = (ss - sc->ss); 2659 m->m_flags |= M_FLOWID; 2660 } 2661 /* pass the frame up the stack */ 2662 (*ifp->if_input)(ifp, m); 2663 } 2664 2665 static inline void 2666 mxge_clean_rx_done(struct mxge_slice_state *ss) 2667 { 2668 mxge_rx_done_t *rx_done = &ss->rx_done; 2669 int limit = 0; 2670 uint16_t length; 2671 uint16_t checksum; 2672 2673 2674 while (rx_done->entry[rx_done->idx].length != 0) { 2675 length = ntohs(rx_done->entry[rx_done->idx].length); 2676 rx_done->entry[rx_done->idx].length = 0; 2677 checksum = rx_done->entry[rx_done->idx].checksum; 2678 if (length <= (MHLEN - MXGEFW_PAD)) 2679 mxge_rx_done_small(ss, length, checksum); 2680 else 2681 mxge_rx_done_big(ss, length, checksum); 2682 rx_done->cnt++; 2683 rx_done->idx = rx_done->cnt & rx_done->mask; 2684 2685 /* limit potential for livelock */ 2686 if (__predict_false(++limit > rx_done->mask / 2)) 2687 break; 2688 } 2689 #ifdef INET 2690 while (!SLIST_EMPTY(&ss->lro_active)) { 2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2692 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2693 mxge_lro_flush(ss, lro); 2694 } 2695 #endif 2696 } 2697 2698 2699 static inline void 2700 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2701 { 2702 struct ifnet *ifp; 2703 mxge_tx_ring_t *tx; 2704 struct mbuf *m; 2705 bus_dmamap_t map; 2706 int idx; 2707 int *flags; 2708 2709 tx = &ss->tx; 2710 ifp = ss->sc->ifp; 2711 while (tx->pkt_done != mcp_idx) { 2712 idx = tx->done & tx->mask; 2713 tx->done++; 2714 m = tx->info[idx].m; 2715 /* mbuf and DMA map only attached to the first 2716 segment per-mbuf */ 2717 if (m != NULL) { 2718 ss->obytes += m->m_pkthdr.len; 2719 if (m->m_flags & M_MCAST) 2720 ss->omcasts++; 2721 ss->opackets++; 2722 tx->info[idx].m = NULL; 2723 map = tx->info[idx].map; 2724 bus_dmamap_unload(tx->dmat, map); 2725 m_freem(m); 2726 } 2727 if (tx->info[idx].flag) { 2728 tx->info[idx].flag = 0; 2729 tx->pkt_done++; 2730 } 2731 } 2732 2733 /* If we have space, clear IFF_OACTIVE to tell the stack that 2734 its OK to send packets */ 2735 #ifdef IFNET_BUF_RING 2736 flags = &ss->if_drv_flags; 2737 #else 2738 flags = &ifp->if_drv_flags; 2739 #endif 2740 mtx_lock(&ss->tx.mtx); 2741 if ((*flags) & IFF_DRV_OACTIVE && 2742 tx->req - tx->done < (tx->mask + 1)/4) { 2743 *(flags) &= ~IFF_DRV_OACTIVE; 2744 ss->tx.wake++; 2745 mxge_start_locked(ss); 2746 } 2747 #ifdef IFNET_BUF_RING 2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2749 /* let the NIC stop polling this queue, since there 2750 * are no more transmits pending */ 2751 if (tx->req == tx->done) { 2752 *tx->send_stop = 1; 2753 tx->queue_active = 0; 2754 tx->deactivate++; 2755 wmb(); 2756 } 2757 } 2758 #endif 2759 mtx_unlock(&ss->tx.mtx); 2760 2761 } 2762 2763 static struct mxge_media_type mxge_xfp_media_types[] = 2764 { 2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2768 {0, (1 << 5), "10GBASE-ER"}, 2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2770 {0, (1 << 3), "10GBASE-SW"}, 2771 {0, (1 << 2), "10GBASE-LW"}, 2772 {0, (1 << 1), "10GBASE-EW"}, 2773 {0, (1 << 0), "Reserved"} 2774 }; 2775 static struct mxge_media_type mxge_sfp_media_types[] = 2776 { 2777 {0, (1 << 7), "Reserved"}, 2778 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2779 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2780 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2781 }; 2782 2783 static void 2784 mxge_set_media(mxge_softc_t *sc, int type) 2785 { 2786 sc->media_flags |= type; 2787 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2788 ifmedia_set(&sc->media, sc->media_flags); 2789 } 2790 2791 2792 /* 2793 * Determine the media type for a NIC. Some XFPs will identify 2794 * themselves only when their link is up, so this is initiated via a 2795 * link up interrupt. However, this can potentially take up to 2796 * several milliseconds, so it is run via the watchdog routine, rather 2797 * than in the interrupt handler itself. This need only be done 2798 * once, not each time the link is up. 2799 */ 2800 static void 2801 mxge_media_probe(mxge_softc_t *sc) 2802 { 2803 mxge_cmd_t cmd; 2804 char *cage_type; 2805 char *ptr; 2806 struct mxge_media_type *mxge_media_types = NULL; 2807 int i, err, ms, mxge_media_type_entries; 2808 uint32_t byte; 2809 2810 sc->need_media_probe = 0; 2811 2812 /* if we've already set a media type, we're done */ 2813 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2814 return; 2815 2816 /* 2817 * parse the product code to deterimine the interface type 2818 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2819 * after the 3rd dash in the driver's cached copy of the 2820 * EEPROM's product code string. 2821 */ 2822 ptr = sc->product_code_string; 2823 if (ptr == NULL) { 2824 device_printf(sc->dev, "Missing product code\n"); 2825 } 2826 2827 for (i = 0; i < 3; i++, ptr++) { 2828 ptr = index(ptr, '-'); 2829 if (ptr == NULL) { 2830 device_printf(sc->dev, 2831 "only %d dashes in PC?!?\n", i); 2832 return; 2833 } 2834 } 2835 if (*ptr == 'C') { 2836 /* -C is CX4 */ 2837 mxge_set_media(sc, IFM_10G_CX4); 2838 return; 2839 } 2840 else if (*ptr == 'Q') { 2841 /* -Q is Quad Ribbon Fiber */ 2842 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2843 /* FreeBSD has no media type for Quad ribbon fiber */ 2844 return; 2845 } 2846 2847 if (*ptr == 'R') { 2848 /* -R is XFP */ 2849 mxge_media_types = mxge_xfp_media_types; 2850 mxge_media_type_entries = 2851 sizeof (mxge_xfp_media_types) / 2852 sizeof (mxge_xfp_media_types[0]); 2853 byte = MXGE_XFP_COMPLIANCE_BYTE; 2854 cage_type = "XFP"; 2855 } 2856 2857 if (*ptr == 'S' || *(ptr +1) == 'S') { 2858 /* -S or -2S is SFP+ */ 2859 mxge_media_types = mxge_sfp_media_types; 2860 mxge_media_type_entries = 2861 sizeof (mxge_sfp_media_types) / 2862 sizeof (mxge_sfp_media_types[0]); 2863 cage_type = "SFP+"; 2864 byte = 3; 2865 } 2866 2867 if (mxge_media_types == NULL) { 2868 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2869 return; 2870 } 2871 2872 /* 2873 * At this point we know the NIC has an XFP cage, so now we 2874 * try to determine what is in the cage by using the 2875 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2876 * register. We read just one byte, which may take over 2877 * a millisecond 2878 */ 2879 2880 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2881 cmd.data1 = byte; 2882 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2883 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2884 device_printf(sc->dev, "failed to read XFP\n"); 2885 } 2886 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2887 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2888 } 2889 if (err != MXGEFW_CMD_OK) { 2890 return; 2891 } 2892 2893 /* now we wait for the data to be cached */ 2894 cmd.data0 = byte; 2895 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2896 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2897 DELAY(1000); 2898 cmd.data0 = byte; 2899 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2900 } 2901 if (err != MXGEFW_CMD_OK) { 2902 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2903 cage_type, err, ms); 2904 return; 2905 } 2906 2907 if (cmd.data0 == mxge_media_types[0].bitmask) { 2908 if (mxge_verbose) 2909 device_printf(sc->dev, "%s:%s\n", cage_type, 2910 mxge_media_types[0].name); 2911 mxge_set_media(sc, IFM_10G_CX4); 2912 return; 2913 } 2914 for (i = 1; i < mxge_media_type_entries; i++) { 2915 if (cmd.data0 & mxge_media_types[i].bitmask) { 2916 if (mxge_verbose) 2917 device_printf(sc->dev, "%s:%s\n", 2918 cage_type, 2919 mxge_media_types[i].name); 2920 2921 mxge_set_media(sc, mxge_media_types[i].flag); 2922 return; 2923 } 2924 } 2925 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2926 cmd.data0); 2927 2928 return; 2929 } 2930 2931 static void 2932 mxge_intr(void *arg) 2933 { 2934 struct mxge_slice_state *ss = arg; 2935 mxge_softc_t *sc = ss->sc; 2936 mcp_irq_data_t *stats = ss->fw_stats; 2937 mxge_tx_ring_t *tx = &ss->tx; 2938 mxge_rx_done_t *rx_done = &ss->rx_done; 2939 uint32_t send_done_count; 2940 uint8_t valid; 2941 2942 2943 #ifndef IFNET_BUF_RING 2944 /* an interrupt on a non-zero slice is implicitly valid 2945 since MSI-X irqs are not shared */ 2946 if (ss != sc->ss) { 2947 mxge_clean_rx_done(ss); 2948 *ss->irq_claim = be32toh(3); 2949 return; 2950 } 2951 #endif 2952 2953 /* make sure the DMA has finished */ 2954 if (!stats->valid) { 2955 return; 2956 } 2957 valid = stats->valid; 2958 2959 if (sc->legacy_irq) { 2960 /* lower legacy IRQ */ 2961 *sc->irq_deassert = 0; 2962 if (!mxge_deassert_wait) 2963 /* don't wait for conf. that irq is low */ 2964 stats->valid = 0; 2965 } else { 2966 stats->valid = 0; 2967 } 2968 2969 /* loop while waiting for legacy irq deassertion */ 2970 do { 2971 /* check for transmit completes and receives */ 2972 send_done_count = be32toh(stats->send_done_count); 2973 while ((send_done_count != tx->pkt_done) || 2974 (rx_done->entry[rx_done->idx].length != 0)) { 2975 if (send_done_count != tx->pkt_done) 2976 mxge_tx_done(ss, (int)send_done_count); 2977 mxge_clean_rx_done(ss); 2978 send_done_count = be32toh(stats->send_done_count); 2979 } 2980 if (sc->legacy_irq && mxge_deassert_wait) 2981 wmb(); 2982 } while (*((volatile uint8_t *) &stats->valid)); 2983 2984 /* fw link & error stats meaningful only on the first slice */ 2985 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2986 if (sc->link_state != stats->link_up) { 2987 sc->link_state = stats->link_up; 2988 if (sc->link_state) { 2989 if_link_state_change(sc->ifp, LINK_STATE_UP); 2990 if (mxge_verbose) 2991 device_printf(sc->dev, "link up\n"); 2992 } else { 2993 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2994 if (mxge_verbose) 2995 device_printf(sc->dev, "link down\n"); 2996 } 2997 sc->need_media_probe = 1; 2998 } 2999 if (sc->rdma_tags_available != 3000 be32toh(stats->rdma_tags_available)) { 3001 sc->rdma_tags_available = 3002 be32toh(stats->rdma_tags_available); 3003 device_printf(sc->dev, "RDMA timed out! %d tags " 3004 "left\n", sc->rdma_tags_available); 3005 } 3006 3007 if (stats->link_down) { 3008 sc->down_cnt += stats->link_down; 3009 sc->link_state = 0; 3010 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3011 } 3012 } 3013 3014 /* check to see if we have rx token to pass back */ 3015 if (valid & 0x1) 3016 *ss->irq_claim = be32toh(3); 3017 *(ss->irq_claim + 1) = be32toh(3); 3018 } 3019 3020 static void 3021 mxge_init(void *arg) 3022 { 3023 } 3024 3025 3026 3027 static void 3028 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3029 { 3030 struct lro_entry *lro_entry; 3031 int i; 3032 3033 while (!SLIST_EMPTY(&ss->lro_free)) { 3034 lro_entry = SLIST_FIRST(&ss->lro_free); 3035 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3036 free(lro_entry, M_DEVBUF); 3037 } 3038 3039 for (i = 0; i <= ss->rx_big.mask; i++) { 3040 if (ss->rx_big.info[i].m == NULL) 3041 continue; 3042 bus_dmamap_unload(ss->rx_big.dmat, 3043 ss->rx_big.info[i].map); 3044 m_freem(ss->rx_big.info[i].m); 3045 ss->rx_big.info[i].m = NULL; 3046 } 3047 3048 for (i = 0; i <= ss->rx_small.mask; i++) { 3049 if (ss->rx_small.info[i].m == NULL) 3050 continue; 3051 bus_dmamap_unload(ss->rx_small.dmat, 3052 ss->rx_small.info[i].map); 3053 m_freem(ss->rx_small.info[i].m); 3054 ss->rx_small.info[i].m = NULL; 3055 } 3056 3057 /* transmit ring used only on the first slice */ 3058 if (ss->tx.info == NULL) 3059 return; 3060 3061 for (i = 0; i <= ss->tx.mask; i++) { 3062 ss->tx.info[i].flag = 0; 3063 if (ss->tx.info[i].m == NULL) 3064 continue; 3065 bus_dmamap_unload(ss->tx.dmat, 3066 ss->tx.info[i].map); 3067 m_freem(ss->tx.info[i].m); 3068 ss->tx.info[i].m = NULL; 3069 } 3070 } 3071 3072 static void 3073 mxge_free_mbufs(mxge_softc_t *sc) 3074 { 3075 int slice; 3076 3077 for (slice = 0; slice < sc->num_slices; slice++) 3078 mxge_free_slice_mbufs(&sc->ss[slice]); 3079 } 3080 3081 static void 3082 mxge_free_slice_rings(struct mxge_slice_state *ss) 3083 { 3084 int i; 3085 3086 3087 if (ss->rx_done.entry != NULL) 3088 mxge_dma_free(&ss->rx_done.dma); 3089 ss->rx_done.entry = NULL; 3090 3091 if (ss->tx.req_bytes != NULL) 3092 free(ss->tx.req_bytes, M_DEVBUF); 3093 ss->tx.req_bytes = NULL; 3094 3095 if (ss->tx.seg_list != NULL) 3096 free(ss->tx.seg_list, M_DEVBUF); 3097 ss->tx.seg_list = NULL; 3098 3099 if (ss->rx_small.shadow != NULL) 3100 free(ss->rx_small.shadow, M_DEVBUF); 3101 ss->rx_small.shadow = NULL; 3102 3103 if (ss->rx_big.shadow != NULL) 3104 free(ss->rx_big.shadow, M_DEVBUF); 3105 ss->rx_big.shadow = NULL; 3106 3107 if (ss->tx.info != NULL) { 3108 if (ss->tx.dmat != NULL) { 3109 for (i = 0; i <= ss->tx.mask; i++) { 3110 bus_dmamap_destroy(ss->tx.dmat, 3111 ss->tx.info[i].map); 3112 } 3113 bus_dma_tag_destroy(ss->tx.dmat); 3114 } 3115 free(ss->tx.info, M_DEVBUF); 3116 } 3117 ss->tx.info = NULL; 3118 3119 if (ss->rx_small.info != NULL) { 3120 if (ss->rx_small.dmat != NULL) { 3121 for (i = 0; i <= ss->rx_small.mask; i++) { 3122 bus_dmamap_destroy(ss->rx_small.dmat, 3123 ss->rx_small.info[i].map); 3124 } 3125 bus_dmamap_destroy(ss->rx_small.dmat, 3126 ss->rx_small.extra_map); 3127 bus_dma_tag_destroy(ss->rx_small.dmat); 3128 } 3129 free(ss->rx_small.info, M_DEVBUF); 3130 } 3131 ss->rx_small.info = NULL; 3132 3133 if (ss->rx_big.info != NULL) { 3134 if (ss->rx_big.dmat != NULL) { 3135 for (i = 0; i <= ss->rx_big.mask; i++) { 3136 bus_dmamap_destroy(ss->rx_big.dmat, 3137 ss->rx_big.info[i].map); 3138 } 3139 bus_dmamap_destroy(ss->rx_big.dmat, 3140 ss->rx_big.extra_map); 3141 bus_dma_tag_destroy(ss->rx_big.dmat); 3142 } 3143 free(ss->rx_big.info, M_DEVBUF); 3144 } 3145 ss->rx_big.info = NULL; 3146 } 3147 3148 static void 3149 mxge_free_rings(mxge_softc_t *sc) 3150 { 3151 int slice; 3152 3153 for (slice = 0; slice < sc->num_slices; slice++) 3154 mxge_free_slice_rings(&sc->ss[slice]); 3155 } 3156 3157 static int 3158 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3159 int tx_ring_entries) 3160 { 3161 mxge_softc_t *sc = ss->sc; 3162 size_t bytes; 3163 int err, i; 3164 3165 err = ENOMEM; 3166 3167 /* allocate per-slice receive resources */ 3168 3169 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3170 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3171 3172 /* allocate the rx shadow rings */ 3173 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3174 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3175 if (ss->rx_small.shadow == NULL) 3176 return err;; 3177 3178 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3179 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3180 if (ss->rx_big.shadow == NULL) 3181 return err;; 3182 3183 /* allocate the rx host info rings */ 3184 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3185 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3186 if (ss->rx_small.info == NULL) 3187 return err;; 3188 3189 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3190 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3191 if (ss->rx_big.info == NULL) 3192 return err;; 3193 3194 /* allocate the rx busdma resources */ 3195 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3196 1, /* alignment */ 3197 4096, /* boundary */ 3198 BUS_SPACE_MAXADDR, /* low */ 3199 BUS_SPACE_MAXADDR, /* high */ 3200 NULL, NULL, /* filter */ 3201 MHLEN, /* maxsize */ 3202 1, /* num segs */ 3203 MHLEN, /* maxsegsize */ 3204 BUS_DMA_ALLOCNOW, /* flags */ 3205 NULL, NULL, /* lock */ 3206 &ss->rx_small.dmat); /* tag */ 3207 if (err != 0) { 3208 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3209 err); 3210 return err;; 3211 } 3212 3213 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3214 1, /* alignment */ 3215 #if MXGE_VIRT_JUMBOS 3216 4096, /* boundary */ 3217 #else 3218 0, /* boundary */ 3219 #endif 3220 BUS_SPACE_MAXADDR, /* low */ 3221 BUS_SPACE_MAXADDR, /* high */ 3222 NULL, NULL, /* filter */ 3223 3*4096, /* maxsize */ 3224 #if MXGE_VIRT_JUMBOS 3225 3, /* num segs */ 3226 4096, /* maxsegsize*/ 3227 #else 3228 1, /* num segs */ 3229 MJUM9BYTES, /* maxsegsize*/ 3230 #endif 3231 BUS_DMA_ALLOCNOW, /* flags */ 3232 NULL, NULL, /* lock */ 3233 &ss->rx_big.dmat); /* tag */ 3234 if (err != 0) { 3235 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3236 err); 3237 return err;; 3238 } 3239 for (i = 0; i <= ss->rx_small.mask; i++) { 3240 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3241 &ss->rx_small.info[i].map); 3242 if (err != 0) { 3243 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3244 err); 3245 return err;; 3246 } 3247 } 3248 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3249 &ss->rx_small.extra_map); 3250 if (err != 0) { 3251 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3252 err); 3253 return err;; 3254 } 3255 3256 for (i = 0; i <= ss->rx_big.mask; i++) { 3257 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3258 &ss->rx_big.info[i].map); 3259 if (err != 0) { 3260 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3261 err); 3262 return err;; 3263 } 3264 } 3265 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3266 &ss->rx_big.extra_map); 3267 if (err != 0) { 3268 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3269 err); 3270 return err;; 3271 } 3272 3273 /* now allocate TX resouces */ 3274 3275 #ifndef IFNET_BUF_RING 3276 /* only use a single TX ring for now */ 3277 if (ss != ss->sc->ss) 3278 return 0; 3279 #endif 3280 3281 ss->tx.mask = tx_ring_entries - 1; 3282 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3283 3284 3285 /* allocate the tx request copy block */ 3286 bytes = 8 + 3287 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3288 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3289 if (ss->tx.req_bytes == NULL) 3290 return err;; 3291 /* ensure req_list entries are aligned to 8 bytes */ 3292 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3293 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3294 3295 /* allocate the tx busdma segment list */ 3296 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3297 ss->tx.seg_list = (bus_dma_segment_t *) 3298 malloc(bytes, M_DEVBUF, M_WAITOK); 3299 if (ss->tx.seg_list == NULL) 3300 return err;; 3301 3302 /* allocate the tx host info ring */ 3303 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3304 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3305 if (ss->tx.info == NULL) 3306 return err;; 3307 3308 /* allocate the tx busdma resources */ 3309 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3310 1, /* alignment */ 3311 sc->tx_boundary, /* boundary */ 3312 BUS_SPACE_MAXADDR, /* low */ 3313 BUS_SPACE_MAXADDR, /* high */ 3314 NULL, NULL, /* filter */ 3315 65536 + 256, /* maxsize */ 3316 ss->tx.max_desc - 2, /* num segs */ 3317 sc->tx_boundary, /* maxsegsz */ 3318 BUS_DMA_ALLOCNOW, /* flags */ 3319 NULL, NULL, /* lock */ 3320 &ss->tx.dmat); /* tag */ 3321 3322 if (err != 0) { 3323 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3324 err); 3325 return err;; 3326 } 3327 3328 /* now use these tags to setup dmamaps for each slot 3329 in the ring */ 3330 for (i = 0; i <= ss->tx.mask; i++) { 3331 err = bus_dmamap_create(ss->tx.dmat, 0, 3332 &ss->tx.info[i].map); 3333 if (err != 0) { 3334 device_printf(sc->dev, "Err %d tx dmamap\n", 3335 err); 3336 return err;; 3337 } 3338 } 3339 return 0; 3340 3341 } 3342 3343 static int 3344 mxge_alloc_rings(mxge_softc_t *sc) 3345 { 3346 mxge_cmd_t cmd; 3347 int tx_ring_size; 3348 int tx_ring_entries, rx_ring_entries; 3349 int err, slice; 3350 3351 /* get ring sizes */ 3352 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3353 tx_ring_size = cmd.data0; 3354 if (err != 0) { 3355 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3356 goto abort; 3357 } 3358 3359 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3360 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3361 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3362 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3363 IFQ_SET_READY(&sc->ifp->if_snd); 3364 3365 for (slice = 0; slice < sc->num_slices; slice++) { 3366 err = mxge_alloc_slice_rings(&sc->ss[slice], 3367 rx_ring_entries, 3368 tx_ring_entries); 3369 if (err != 0) 3370 goto abort; 3371 } 3372 return 0; 3373 3374 abort: 3375 mxge_free_rings(sc); 3376 return err; 3377 3378 } 3379 3380 3381 static void 3382 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3383 { 3384 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3385 3386 if (bufsize < MCLBYTES) { 3387 /* easy, everything fits in a single buffer */ 3388 *big_buf_size = MCLBYTES; 3389 *cl_size = MCLBYTES; 3390 *nbufs = 1; 3391 return; 3392 } 3393 3394 if (bufsize < MJUMPAGESIZE) { 3395 /* still easy, everything still fits in a single buffer */ 3396 *big_buf_size = MJUMPAGESIZE; 3397 *cl_size = MJUMPAGESIZE; 3398 *nbufs = 1; 3399 return; 3400 } 3401 #if MXGE_VIRT_JUMBOS 3402 /* now we need to use virtually contiguous buffers */ 3403 *cl_size = MJUM9BYTES; 3404 *big_buf_size = 4096; 3405 *nbufs = mtu / 4096 + 1; 3406 /* needs to be a power of two, so round up */ 3407 if (*nbufs == 3) 3408 *nbufs = 4; 3409 #else 3410 *cl_size = MJUM9BYTES; 3411 *big_buf_size = MJUM9BYTES; 3412 *nbufs = 1; 3413 #endif 3414 } 3415 3416 static int 3417 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3418 { 3419 mxge_softc_t *sc; 3420 mxge_cmd_t cmd; 3421 bus_dmamap_t map; 3422 struct lro_entry *lro_entry; 3423 int err, i, slice; 3424 3425 3426 sc = ss->sc; 3427 slice = ss - sc->ss; 3428 3429 SLIST_INIT(&ss->lro_free); 3430 SLIST_INIT(&ss->lro_active); 3431 3432 for (i = 0; i < sc->lro_cnt; i++) { 3433 lro_entry = (struct lro_entry *) 3434 malloc(sizeof (*lro_entry), M_DEVBUF, 3435 M_NOWAIT | M_ZERO); 3436 if (lro_entry == NULL) { 3437 sc->lro_cnt = i; 3438 break; 3439 } 3440 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3441 } 3442 /* get the lanai pointers to the send and receive rings */ 3443 3444 err = 0; 3445 #ifndef IFNET_BUF_RING 3446 /* We currently only send from the first slice */ 3447 if (slice == 0) { 3448 #endif 3449 cmd.data0 = slice; 3450 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3451 ss->tx.lanai = 3452 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3453 ss->tx.send_go = (volatile uint32_t *) 3454 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3455 ss->tx.send_stop = (volatile uint32_t *) 3456 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3457 #ifndef IFNET_BUF_RING 3458 } 3459 #endif 3460 cmd.data0 = slice; 3461 err |= mxge_send_cmd(sc, 3462 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3463 ss->rx_small.lanai = 3464 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3465 cmd.data0 = slice; 3466 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3467 ss->rx_big.lanai = 3468 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3469 3470 if (err != 0) { 3471 device_printf(sc->dev, 3472 "failed to get ring sizes or locations\n"); 3473 return EIO; 3474 } 3475 3476 /* stock receive rings */ 3477 for (i = 0; i <= ss->rx_small.mask; i++) { 3478 map = ss->rx_small.info[i].map; 3479 err = mxge_get_buf_small(ss, map, i); 3480 if (err) { 3481 device_printf(sc->dev, "alloced %d/%d smalls\n", 3482 i, ss->rx_small.mask + 1); 3483 return ENOMEM; 3484 } 3485 } 3486 for (i = 0; i <= ss->rx_big.mask; i++) { 3487 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3488 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3489 } 3490 ss->rx_big.nbufs = nbufs; 3491 ss->rx_big.cl_size = cl_size; 3492 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3493 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3494 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3495 map = ss->rx_big.info[i].map; 3496 err = mxge_get_buf_big(ss, map, i); 3497 if (err) { 3498 device_printf(sc->dev, "alloced %d/%d bigs\n", 3499 i, ss->rx_big.mask + 1); 3500 return ENOMEM; 3501 } 3502 } 3503 return 0; 3504 } 3505 3506 static int 3507 mxge_open(mxge_softc_t *sc) 3508 { 3509 mxge_cmd_t cmd; 3510 int err, big_bytes, nbufs, slice, cl_size, i; 3511 bus_addr_t bus; 3512 volatile uint8_t *itable; 3513 struct mxge_slice_state *ss; 3514 3515 /* Copy the MAC address in case it was overridden */ 3516 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3517 3518 err = mxge_reset(sc, 1); 3519 if (err != 0) { 3520 device_printf(sc->dev, "failed to reset\n"); 3521 return EIO; 3522 } 3523 3524 if (sc->num_slices > 1) { 3525 /* setup the indirection table */ 3526 cmd.data0 = sc->num_slices; 3527 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3528 &cmd); 3529 3530 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3531 &cmd); 3532 if (err != 0) { 3533 device_printf(sc->dev, 3534 "failed to setup rss tables\n"); 3535 return err; 3536 } 3537 3538 /* just enable an identity mapping */ 3539 itable = sc->sram + cmd.data0; 3540 for (i = 0; i < sc->num_slices; i++) 3541 itable[i] = (uint8_t)i; 3542 3543 cmd.data0 = 1; 3544 cmd.data1 = mxge_rss_hash_type; 3545 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3546 if (err != 0) { 3547 device_printf(sc->dev, "failed to enable slices\n"); 3548 return err; 3549 } 3550 } 3551 3552 3553 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3554 3555 cmd.data0 = nbufs; 3556 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3557 &cmd); 3558 /* error is only meaningful if we're trying to set 3559 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3560 if (err && nbufs > 1) { 3561 device_printf(sc->dev, 3562 "Failed to set alway-use-n to %d\n", 3563 nbufs); 3564 return EIO; 3565 } 3566 /* Give the firmware the mtu and the big and small buffer 3567 sizes. The firmware wants the big buf size to be a power 3568 of two. Luckily, FreeBSD's clusters are powers of two */ 3569 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3570 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3571 cmd.data0 = MHLEN - MXGEFW_PAD; 3572 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3573 &cmd); 3574 cmd.data0 = big_bytes; 3575 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3576 3577 if (err != 0) { 3578 device_printf(sc->dev, "failed to setup params\n"); 3579 goto abort; 3580 } 3581 3582 /* Now give him the pointer to the stats block */ 3583 for (slice = 0; 3584 #ifdef IFNET_BUF_RING 3585 slice < sc->num_slices; 3586 #else 3587 slice < 1; 3588 #endif 3589 slice++) { 3590 ss = &sc->ss[slice]; 3591 cmd.data0 = 3592 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3593 cmd.data1 = 3594 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3595 cmd.data2 = sizeof(struct mcp_irq_data); 3596 cmd.data2 |= (slice << 16); 3597 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3598 } 3599 3600 if (err != 0) { 3601 bus = sc->ss->fw_stats_dma.bus_addr; 3602 bus += offsetof(struct mcp_irq_data, send_done_count); 3603 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3604 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3605 err = mxge_send_cmd(sc, 3606 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3607 &cmd); 3608 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3609 sc->fw_multicast_support = 0; 3610 } else { 3611 sc->fw_multicast_support = 1; 3612 } 3613 3614 if (err != 0) { 3615 device_printf(sc->dev, "failed to setup params\n"); 3616 goto abort; 3617 } 3618 3619 for (slice = 0; slice < sc->num_slices; slice++) { 3620 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3621 if (err != 0) { 3622 device_printf(sc->dev, "couldn't open slice %d\n", 3623 slice); 3624 goto abort; 3625 } 3626 } 3627 3628 /* Finally, start the firmware running */ 3629 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3630 if (err) { 3631 device_printf(sc->dev, "Couldn't bring up link\n"); 3632 goto abort; 3633 } 3634 #ifdef IFNET_BUF_RING 3635 for (slice = 0; slice < sc->num_slices; slice++) { 3636 ss = &sc->ss[slice]; 3637 ss->if_drv_flags |= IFF_DRV_RUNNING; 3638 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3639 } 3640 #endif 3641 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3642 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3643 3644 return 0; 3645 3646 3647 abort: 3648 mxge_free_mbufs(sc); 3649 3650 return err; 3651 } 3652 3653 static int 3654 mxge_close(mxge_softc_t *sc, int down) 3655 { 3656 mxge_cmd_t cmd; 3657 int err, old_down_cnt; 3658 #ifdef IFNET_BUF_RING 3659 struct mxge_slice_state *ss; 3660 int slice; 3661 #endif 3662 3663 #ifdef IFNET_BUF_RING 3664 for (slice = 0; slice < sc->num_slices; slice++) { 3665 ss = &sc->ss[slice]; 3666 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3667 } 3668 #endif 3669 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3670 if (!down) { 3671 old_down_cnt = sc->down_cnt; 3672 wmb(); 3673 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3674 if (err) { 3675 device_printf(sc->dev, 3676 "Couldn't bring down link\n"); 3677 } 3678 if (old_down_cnt == sc->down_cnt) { 3679 /* wait for down irq */ 3680 DELAY(10 * sc->intr_coal_delay); 3681 } 3682 wmb(); 3683 if (old_down_cnt == sc->down_cnt) { 3684 device_printf(sc->dev, "never got down irq\n"); 3685 } 3686 } 3687 mxge_free_mbufs(sc); 3688 3689 return 0; 3690 } 3691 3692 static void 3693 mxge_setup_cfg_space(mxge_softc_t *sc) 3694 { 3695 device_t dev = sc->dev; 3696 int reg; 3697 uint16_t cmd, lnk, pectl; 3698 3699 /* find the PCIe link width and set max read request to 4KB*/ 3700 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3701 lnk = pci_read_config(dev, reg + 0x12, 2); 3702 sc->link_width = (lnk >> 4) & 0x3f; 3703 3704 if (sc->pectl == 0) { 3705 pectl = pci_read_config(dev, reg + 0x8, 2); 3706 pectl = (pectl & ~0x7000) | (5 << 12); 3707 pci_write_config(dev, reg + 0x8, pectl, 2); 3708 sc->pectl = pectl; 3709 } else { 3710 /* restore saved pectl after watchdog reset */ 3711 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3712 } 3713 } 3714 3715 /* Enable DMA and Memory space access */ 3716 pci_enable_busmaster(dev); 3717 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3718 cmd |= PCIM_CMD_MEMEN; 3719 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3720 } 3721 3722 static uint32_t 3723 mxge_read_reboot(mxge_softc_t *sc) 3724 { 3725 device_t dev = sc->dev; 3726 uint32_t vs; 3727 3728 /* find the vendor specific offset */ 3729 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3730 device_printf(sc->dev, 3731 "could not find vendor specific offset\n"); 3732 return (uint32_t)-1; 3733 } 3734 /* enable read32 mode */ 3735 pci_write_config(dev, vs + 0x10, 0x3, 1); 3736 /* tell NIC which register to read */ 3737 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3738 return (pci_read_config(dev, vs + 0x14, 4)); 3739 } 3740 3741 static void 3742 mxge_watchdog_reset(mxge_softc_t *sc) 3743 { 3744 struct pci_devinfo *dinfo; 3745 struct mxge_slice_state *ss; 3746 int err, running, s, num_tx_slices = 1; 3747 uint32_t reboot; 3748 uint16_t cmd; 3749 3750 err = ENXIO; 3751 3752 device_printf(sc->dev, "Watchdog reset!\n"); 3753 3754 /* 3755 * check to see if the NIC rebooted. If it did, then all of 3756 * PCI config space has been reset, and things like the 3757 * busmaster bit will be zero. If this is the case, then we 3758 * must restore PCI config space before the NIC can be used 3759 * again 3760 */ 3761 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3762 if (cmd == 0xffff) { 3763 /* 3764 * maybe the watchdog caught the NIC rebooting; wait 3765 * up to 100ms for it to finish. If it does not come 3766 * back, then give up 3767 */ 3768 DELAY(1000*100); 3769 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3770 if (cmd == 0xffff) { 3771 device_printf(sc->dev, "NIC disappeared!\n"); 3772 } 3773 } 3774 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3775 /* print the reboot status */ 3776 reboot = mxge_read_reboot(sc); 3777 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3778 reboot); 3779 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3780 if (running) { 3781 3782 /* 3783 * quiesce NIC so that TX routines will not try to 3784 * xmit after restoration of BAR 3785 */ 3786 3787 /* Mark the link as down */ 3788 if (sc->link_state) { 3789 sc->link_state = 0; 3790 if_link_state_change(sc->ifp, 3791 LINK_STATE_DOWN); 3792 } 3793 #ifdef IFNET_BUF_RING 3794 num_tx_slices = sc->num_slices; 3795 #endif 3796 /* grab all TX locks to ensure no tx */ 3797 for (s = 0; s < num_tx_slices; s++) { 3798 ss = &sc->ss[s]; 3799 mtx_lock(&ss->tx.mtx); 3800 } 3801 mxge_close(sc, 1); 3802 } 3803 /* restore PCI configuration space */ 3804 dinfo = device_get_ivars(sc->dev); 3805 pci_cfg_restore(sc->dev, dinfo); 3806 3807 /* and redo any changes we made to our config space */ 3808 mxge_setup_cfg_space(sc); 3809 3810 /* reload f/w */ 3811 err = mxge_load_firmware(sc, 0); 3812 if (err) { 3813 device_printf(sc->dev, 3814 "Unable to re-load f/w\n"); 3815 } 3816 if (running) { 3817 if (!err) 3818 err = mxge_open(sc); 3819 /* release all TX locks */ 3820 for (s = 0; s < num_tx_slices; s++) { 3821 ss = &sc->ss[s]; 3822 #ifdef IFNET_BUF_RING 3823 mxge_start_locked(ss); 3824 #endif 3825 mtx_unlock(&ss->tx.mtx); 3826 } 3827 } 3828 sc->watchdog_resets++; 3829 } else { 3830 device_printf(sc->dev, 3831 "NIC did not reboot, not resetting\n"); 3832 err = 0; 3833 } 3834 if (err) { 3835 device_printf(sc->dev, "watchdog reset failed\n"); 3836 } else { 3837 if (sc->dying == 2) 3838 sc->dying = 0; 3839 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3840 } 3841 } 3842 3843 static void 3844 mxge_watchdog_task(void *arg, int pending) 3845 { 3846 mxge_softc_t *sc = arg; 3847 3848 3849 mtx_lock(&sc->driver_mtx); 3850 mxge_watchdog_reset(sc); 3851 mtx_unlock(&sc->driver_mtx); 3852 } 3853 3854 static void 3855 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3856 { 3857 tx = &sc->ss[slice].tx; 3858 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3859 device_printf(sc->dev, 3860 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3861 tx->req, tx->done, tx->queue_active); 3862 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3863 tx->activate, tx->deactivate); 3864 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3865 tx->pkt_done, 3866 be32toh(sc->ss->fw_stats->send_done_count)); 3867 } 3868 3869 static int 3870 mxge_watchdog(mxge_softc_t *sc) 3871 { 3872 mxge_tx_ring_t *tx; 3873 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3874 int i, err = 0; 3875 3876 /* see if we have outstanding transmits, which 3877 have been pending for more than mxge_ticks */ 3878 for (i = 0; 3879 #ifdef IFNET_BUF_RING 3880 (i < sc->num_slices) && (err == 0); 3881 #else 3882 (i < 1) && (err == 0); 3883 #endif 3884 i++) { 3885 tx = &sc->ss[i].tx; 3886 if (tx->req != tx->done && 3887 tx->watchdog_req != tx->watchdog_done && 3888 tx->done == tx->watchdog_done) { 3889 /* check for pause blocking before resetting */ 3890 if (tx->watchdog_rx_pause == rx_pause) { 3891 mxge_warn_stuck(sc, tx, i); 3892 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3893 return (ENXIO); 3894 } 3895 else 3896 device_printf(sc->dev, "Flow control blocking " 3897 "xmits, check link partner\n"); 3898 } 3899 3900 tx->watchdog_req = tx->req; 3901 tx->watchdog_done = tx->done; 3902 tx->watchdog_rx_pause = rx_pause; 3903 } 3904 3905 if (sc->need_media_probe) 3906 mxge_media_probe(sc); 3907 return (err); 3908 } 3909 3910 static u_long 3911 mxge_update_stats(mxge_softc_t *sc) 3912 { 3913 struct mxge_slice_state *ss; 3914 u_long pkts = 0; 3915 u_long ipackets = 0; 3916 u_long opackets = 0; 3917 #ifdef IFNET_BUF_RING 3918 u_long obytes = 0; 3919 u_long omcasts = 0; 3920 u_long odrops = 0; 3921 #endif 3922 u_long oerrors = 0; 3923 int slice; 3924 3925 for (slice = 0; slice < sc->num_slices; slice++) { 3926 ss = &sc->ss[slice]; 3927 ipackets += ss->ipackets; 3928 opackets += ss->opackets; 3929 #ifdef IFNET_BUF_RING 3930 obytes += ss->obytes; 3931 omcasts += ss->omcasts; 3932 odrops += ss->tx.br->br_drops; 3933 #endif 3934 oerrors += ss->oerrors; 3935 } 3936 pkts = (ipackets - sc->ifp->if_ipackets); 3937 pkts += (opackets - sc->ifp->if_opackets); 3938 sc->ifp->if_ipackets = ipackets; 3939 sc->ifp->if_opackets = opackets; 3940 #ifdef IFNET_BUF_RING 3941 sc->ifp->if_obytes = obytes; 3942 sc->ifp->if_omcasts = omcasts; 3943 sc->ifp->if_snd.ifq_drops = odrops; 3944 #endif 3945 sc->ifp->if_oerrors = oerrors; 3946 return pkts; 3947 } 3948 3949 static void 3950 mxge_tick(void *arg) 3951 { 3952 mxge_softc_t *sc = arg; 3953 u_long pkts = 0; 3954 int err = 0; 3955 int running, ticks; 3956 uint16_t cmd; 3957 3958 ticks = mxge_ticks; 3959 mtx_lock(&sc->driver_mtx); 3960 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3961 mtx_unlock(&sc->driver_mtx); 3962 if (running) { 3963 /* aggregate stats from different slices */ 3964 pkts = mxge_update_stats(sc); 3965 if (!sc->watchdog_countdown) { 3966 err = mxge_watchdog(sc); 3967 sc->watchdog_countdown = 4; 3968 } 3969 sc->watchdog_countdown--; 3970 } 3971 if (pkts == 0) { 3972 /* ensure NIC did not suffer h/w fault while idle */ 3973 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3974 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3975 sc->dying = 2; 3976 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3977 err = ENXIO; 3978 } 3979 /* look less often if NIC is idle */ 3980 ticks *= 4; 3981 } 3982 3983 if (err == 0) 3984 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3985 3986 } 3987 3988 static int 3989 mxge_media_change(struct ifnet *ifp) 3990 { 3991 return EINVAL; 3992 } 3993 3994 static int 3995 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3996 { 3997 struct ifnet *ifp = sc->ifp; 3998 int real_mtu, old_mtu; 3999 int err = 0; 4000 4001 4002 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4003 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4004 return EINVAL; 4005 mtx_lock(&sc->driver_mtx); 4006 old_mtu = ifp->if_mtu; 4007 ifp->if_mtu = mtu; 4008 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4009 mxge_close(sc, 0); 4010 err = mxge_open(sc); 4011 if (err != 0) { 4012 ifp->if_mtu = old_mtu; 4013 mxge_close(sc, 0); 4014 (void) mxge_open(sc); 4015 } 4016 } 4017 mtx_unlock(&sc->driver_mtx); 4018 return err; 4019 } 4020 4021 static void 4022 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4023 { 4024 mxge_softc_t *sc = ifp->if_softc; 4025 4026 4027 if (sc == NULL) 4028 return; 4029 ifmr->ifm_status = IFM_AVALID; 4030 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4031 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 4032 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 4033 } 4034 4035 static int 4036 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4037 { 4038 mxge_softc_t *sc = ifp->if_softc; 4039 struct ifreq *ifr = (struct ifreq *)data; 4040 int err, mask; 4041 4042 err = 0; 4043 switch (command) { 4044 case SIOCSIFADDR: 4045 case SIOCGIFADDR: 4046 err = ether_ioctl(ifp, command, data); 4047 break; 4048 4049 case SIOCSIFMTU: 4050 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4051 break; 4052 4053 case SIOCSIFFLAGS: 4054 mtx_lock(&sc->driver_mtx); 4055 if (sc->dying) { 4056 mtx_unlock(&sc->driver_mtx); 4057 return EINVAL; 4058 } 4059 if (ifp->if_flags & IFF_UP) { 4060 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4061 err = mxge_open(sc); 4062 } else { 4063 /* take care of promis can allmulti 4064 flag chages */ 4065 mxge_change_promisc(sc, 4066 ifp->if_flags & IFF_PROMISC); 4067 mxge_set_multicast_list(sc); 4068 } 4069 } else { 4070 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4071 mxge_close(sc, 0); 4072 } 4073 } 4074 mtx_unlock(&sc->driver_mtx); 4075 break; 4076 4077 case SIOCADDMULTI: 4078 case SIOCDELMULTI: 4079 mtx_lock(&sc->driver_mtx); 4080 mxge_set_multicast_list(sc); 4081 mtx_unlock(&sc->driver_mtx); 4082 break; 4083 4084 case SIOCSIFCAP: 4085 mtx_lock(&sc->driver_mtx); 4086 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4087 if (mask & IFCAP_TXCSUM) { 4088 if (IFCAP_TXCSUM & ifp->if_capenable) { 4089 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4090 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4091 | CSUM_TSO); 4092 } else { 4093 ifp->if_capenable |= IFCAP_TXCSUM; 4094 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4095 } 4096 } else if (mask & IFCAP_RXCSUM) { 4097 if (IFCAP_RXCSUM & ifp->if_capenable) { 4098 ifp->if_capenable &= ~IFCAP_RXCSUM; 4099 sc->csum_flag = 0; 4100 } else { 4101 ifp->if_capenable |= IFCAP_RXCSUM; 4102 sc->csum_flag = 1; 4103 } 4104 } 4105 if (mask & IFCAP_TSO4) { 4106 if (IFCAP_TSO4 & ifp->if_capenable) { 4107 ifp->if_capenable &= ~IFCAP_TSO4; 4108 ifp->if_hwassist &= ~CSUM_TSO; 4109 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4110 ifp->if_capenable |= IFCAP_TSO4; 4111 ifp->if_hwassist |= CSUM_TSO; 4112 } else { 4113 printf("mxge requires tx checksum offload" 4114 " be enabled to use TSO\n"); 4115 err = EINVAL; 4116 } 4117 } 4118 if (mask & IFCAP_LRO) { 4119 if (IFCAP_LRO & ifp->if_capenable) 4120 err = mxge_change_lro_locked(sc, 0); 4121 else 4122 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4123 } 4124 if (mask & IFCAP_VLAN_HWTAGGING) 4125 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4126 mtx_unlock(&sc->driver_mtx); 4127 VLAN_CAPABILITIES(ifp); 4128 4129 break; 4130 4131 case SIOCGIFMEDIA: 4132 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4133 &sc->media, command); 4134 break; 4135 4136 default: 4137 err = ENOTTY; 4138 } 4139 return err; 4140 } 4141 4142 static void 4143 mxge_fetch_tunables(mxge_softc_t *sc) 4144 { 4145 4146 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4147 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4148 &mxge_flow_control); 4149 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4150 &mxge_intr_coal_delay); 4151 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4152 &mxge_nvidia_ecrc_enable); 4153 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4154 &mxge_force_firmware); 4155 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4156 &mxge_deassert_wait); 4157 TUNABLE_INT_FETCH("hw.mxge.verbose", 4158 &mxge_verbose); 4159 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4160 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4161 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4162 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4163 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4164 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4165 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4166 if (sc->lro_cnt != 0) 4167 mxge_lro_cnt = sc->lro_cnt; 4168 4169 if (bootverbose) 4170 mxge_verbose = 1; 4171 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4172 mxge_intr_coal_delay = 30; 4173 if (mxge_ticks == 0) 4174 mxge_ticks = hz / 2; 4175 sc->pause = mxge_flow_control; 4176 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4177 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4178 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4179 } 4180 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4181 mxge_initial_mtu < ETHER_MIN_LEN) 4182 mxge_initial_mtu = ETHERMTU_JUMBO; 4183 4184 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4185 mxge_throttle = MXGE_MAX_THROTTLE; 4186 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4187 mxge_throttle = MXGE_MIN_THROTTLE; 4188 sc->throttle = mxge_throttle; 4189 } 4190 4191 4192 static void 4193 mxge_free_slices(mxge_softc_t *sc) 4194 { 4195 struct mxge_slice_state *ss; 4196 int i; 4197 4198 4199 if (sc->ss == NULL) 4200 return; 4201 4202 for (i = 0; i < sc->num_slices; i++) { 4203 ss = &sc->ss[i]; 4204 if (ss->fw_stats != NULL) { 4205 mxge_dma_free(&ss->fw_stats_dma); 4206 ss->fw_stats = NULL; 4207 #ifdef IFNET_BUF_RING 4208 if (ss->tx.br != NULL) { 4209 drbr_free(ss->tx.br, M_DEVBUF); 4210 ss->tx.br = NULL; 4211 } 4212 #endif 4213 mtx_destroy(&ss->tx.mtx); 4214 } 4215 if (ss->rx_done.entry != NULL) { 4216 mxge_dma_free(&ss->rx_done.dma); 4217 ss->rx_done.entry = NULL; 4218 } 4219 } 4220 free(sc->ss, M_DEVBUF); 4221 sc->ss = NULL; 4222 } 4223 4224 static int 4225 mxge_alloc_slices(mxge_softc_t *sc) 4226 { 4227 mxge_cmd_t cmd; 4228 struct mxge_slice_state *ss; 4229 size_t bytes; 4230 int err, i, max_intr_slots; 4231 4232 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4233 if (err != 0) { 4234 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4235 return err; 4236 } 4237 sc->rx_ring_size = cmd.data0; 4238 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4239 4240 bytes = sizeof (*sc->ss) * sc->num_slices; 4241 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4242 if (sc->ss == NULL) 4243 return (ENOMEM); 4244 for (i = 0; i < sc->num_slices; i++) { 4245 ss = &sc->ss[i]; 4246 4247 ss->sc = sc; 4248 4249 /* allocate per-slice rx interrupt queues */ 4250 4251 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4252 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4253 if (err != 0) 4254 goto abort; 4255 ss->rx_done.entry = ss->rx_done.dma.addr; 4256 bzero(ss->rx_done.entry, bytes); 4257 4258 /* 4259 * allocate the per-slice firmware stats; stats 4260 * (including tx) are used used only on the first 4261 * slice for now 4262 */ 4263 #ifndef IFNET_BUF_RING 4264 if (i > 0) 4265 continue; 4266 #endif 4267 4268 bytes = sizeof (*ss->fw_stats); 4269 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4270 sizeof (*ss->fw_stats), 64); 4271 if (err != 0) 4272 goto abort; 4273 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4274 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4275 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4276 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4277 #ifdef IFNET_BUF_RING 4278 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4279 &ss->tx.mtx); 4280 #endif 4281 } 4282 4283 return (0); 4284 4285 abort: 4286 mxge_free_slices(sc); 4287 return (ENOMEM); 4288 } 4289 4290 static void 4291 mxge_slice_probe(mxge_softc_t *sc) 4292 { 4293 mxge_cmd_t cmd; 4294 char *old_fw; 4295 int msix_cnt, status, max_intr_slots; 4296 4297 sc->num_slices = 1; 4298 /* 4299 * don't enable multiple slices if they are not enabled, 4300 * or if this is not an SMP system 4301 */ 4302 4303 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4304 return; 4305 4306 /* see how many MSI-X interrupts are available */ 4307 msix_cnt = pci_msix_count(sc->dev); 4308 if (msix_cnt < 2) 4309 return; 4310 4311 /* now load the slice aware firmware see what it supports */ 4312 old_fw = sc->fw_name; 4313 if (old_fw == mxge_fw_aligned) 4314 sc->fw_name = mxge_fw_rss_aligned; 4315 else 4316 sc->fw_name = mxge_fw_rss_unaligned; 4317 status = mxge_load_firmware(sc, 0); 4318 if (status != 0) { 4319 device_printf(sc->dev, "Falling back to a single slice\n"); 4320 return; 4321 } 4322 4323 /* try to send a reset command to the card to see if it 4324 is alive */ 4325 memset(&cmd, 0, sizeof (cmd)); 4326 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4327 if (status != 0) { 4328 device_printf(sc->dev, "failed reset\n"); 4329 goto abort_with_fw; 4330 } 4331 4332 /* get rx ring size */ 4333 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4334 if (status != 0) { 4335 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4336 goto abort_with_fw; 4337 } 4338 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4339 4340 /* tell it the size of the interrupt queues */ 4341 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4342 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4343 if (status != 0) { 4344 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4345 goto abort_with_fw; 4346 } 4347 4348 /* ask the maximum number of slices it supports */ 4349 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4350 if (status != 0) { 4351 device_printf(sc->dev, 4352 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4353 goto abort_with_fw; 4354 } 4355 sc->num_slices = cmd.data0; 4356 if (sc->num_slices > msix_cnt) 4357 sc->num_slices = msix_cnt; 4358 4359 if (mxge_max_slices == -1) { 4360 /* cap to number of CPUs in system */ 4361 if (sc->num_slices > mp_ncpus) 4362 sc->num_slices = mp_ncpus; 4363 } else { 4364 if (sc->num_slices > mxge_max_slices) 4365 sc->num_slices = mxge_max_slices; 4366 } 4367 /* make sure it is a power of two */ 4368 while (sc->num_slices & (sc->num_slices - 1)) 4369 sc->num_slices--; 4370 4371 if (mxge_verbose) 4372 device_printf(sc->dev, "using %d slices\n", 4373 sc->num_slices); 4374 4375 return; 4376 4377 abort_with_fw: 4378 sc->fw_name = old_fw; 4379 (void) mxge_load_firmware(sc, 0); 4380 } 4381 4382 static int 4383 mxge_add_msix_irqs(mxge_softc_t *sc) 4384 { 4385 size_t bytes; 4386 int count, err, i, rid; 4387 4388 rid = PCIR_BAR(2); 4389 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4390 &rid, RF_ACTIVE); 4391 4392 if (sc->msix_table_res == NULL) { 4393 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4394 return ENXIO; 4395 } 4396 4397 count = sc->num_slices; 4398 err = pci_alloc_msix(sc->dev, &count); 4399 if (err != 0) { 4400 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4401 "err = %d \n", sc->num_slices, err); 4402 goto abort_with_msix_table; 4403 } 4404 if (count < sc->num_slices) { 4405 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4406 count, sc->num_slices); 4407 device_printf(sc->dev, 4408 "Try setting hw.mxge.max_slices to %d\n", 4409 count); 4410 err = ENOSPC; 4411 goto abort_with_msix; 4412 } 4413 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4414 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4415 if (sc->msix_irq_res == NULL) { 4416 err = ENOMEM; 4417 goto abort_with_msix; 4418 } 4419 4420 for (i = 0; i < sc->num_slices; i++) { 4421 rid = i + 1; 4422 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4423 SYS_RES_IRQ, 4424 &rid, RF_ACTIVE); 4425 if (sc->msix_irq_res[i] == NULL) { 4426 device_printf(sc->dev, "couldn't allocate IRQ res" 4427 " for message %d\n", i); 4428 err = ENXIO; 4429 goto abort_with_res; 4430 } 4431 } 4432 4433 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4434 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4435 4436 for (i = 0; i < sc->num_slices; i++) { 4437 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4438 INTR_TYPE_NET | INTR_MPSAFE, 4439 #if __FreeBSD_version > 700030 4440 NULL, 4441 #endif 4442 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4443 if (err != 0) { 4444 device_printf(sc->dev, "couldn't setup intr for " 4445 "message %d\n", i); 4446 goto abort_with_intr; 4447 } 4448 } 4449 4450 if (mxge_verbose) { 4451 device_printf(sc->dev, "using %d msix IRQs:", 4452 sc->num_slices); 4453 for (i = 0; i < sc->num_slices; i++) 4454 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4455 printf("\n"); 4456 } 4457 return (0); 4458 4459 abort_with_intr: 4460 for (i = 0; i < sc->num_slices; i++) { 4461 if (sc->msix_ih[i] != NULL) { 4462 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4463 sc->msix_ih[i]); 4464 sc->msix_ih[i] = NULL; 4465 } 4466 } 4467 free(sc->msix_ih, M_DEVBUF); 4468 4469 4470 abort_with_res: 4471 for (i = 0; i < sc->num_slices; i++) { 4472 rid = i + 1; 4473 if (sc->msix_irq_res[i] != NULL) 4474 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4475 sc->msix_irq_res[i]); 4476 sc->msix_irq_res[i] = NULL; 4477 } 4478 free(sc->msix_irq_res, M_DEVBUF); 4479 4480 4481 abort_with_msix: 4482 pci_release_msi(sc->dev); 4483 4484 abort_with_msix_table: 4485 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4486 sc->msix_table_res); 4487 4488 return err; 4489 } 4490 4491 static int 4492 mxge_add_single_irq(mxge_softc_t *sc) 4493 { 4494 int count, err, rid; 4495 4496 count = pci_msi_count(sc->dev); 4497 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4498 rid = 1; 4499 } else { 4500 rid = 0; 4501 sc->legacy_irq = 1; 4502 } 4503 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4504 1, RF_SHAREABLE | RF_ACTIVE); 4505 if (sc->irq_res == NULL) { 4506 device_printf(sc->dev, "could not alloc interrupt\n"); 4507 return ENXIO; 4508 } 4509 if (mxge_verbose) 4510 device_printf(sc->dev, "using %s irq %ld\n", 4511 sc->legacy_irq ? "INTx" : "MSI", 4512 rman_get_start(sc->irq_res)); 4513 err = bus_setup_intr(sc->dev, sc->irq_res, 4514 INTR_TYPE_NET | INTR_MPSAFE, 4515 #if __FreeBSD_version > 700030 4516 NULL, 4517 #endif 4518 mxge_intr, &sc->ss[0], &sc->ih); 4519 if (err != 0) { 4520 bus_release_resource(sc->dev, SYS_RES_IRQ, 4521 sc->legacy_irq ? 0 : 1, sc->irq_res); 4522 if (!sc->legacy_irq) 4523 pci_release_msi(sc->dev); 4524 } 4525 return err; 4526 } 4527 4528 static void 4529 mxge_rem_msix_irqs(mxge_softc_t *sc) 4530 { 4531 int i, rid; 4532 4533 for (i = 0; i < sc->num_slices; i++) { 4534 if (sc->msix_ih[i] != NULL) { 4535 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4536 sc->msix_ih[i]); 4537 sc->msix_ih[i] = NULL; 4538 } 4539 } 4540 free(sc->msix_ih, M_DEVBUF); 4541 4542 for (i = 0; i < sc->num_slices; i++) { 4543 rid = i + 1; 4544 if (sc->msix_irq_res[i] != NULL) 4545 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4546 sc->msix_irq_res[i]); 4547 sc->msix_irq_res[i] = NULL; 4548 } 4549 free(sc->msix_irq_res, M_DEVBUF); 4550 4551 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4552 sc->msix_table_res); 4553 4554 pci_release_msi(sc->dev); 4555 return; 4556 } 4557 4558 static void 4559 mxge_rem_single_irq(mxge_softc_t *sc) 4560 { 4561 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4562 bus_release_resource(sc->dev, SYS_RES_IRQ, 4563 sc->legacy_irq ? 0 : 1, sc->irq_res); 4564 if (!sc->legacy_irq) 4565 pci_release_msi(sc->dev); 4566 } 4567 4568 static void 4569 mxge_rem_irq(mxge_softc_t *sc) 4570 { 4571 if (sc->num_slices > 1) 4572 mxge_rem_msix_irqs(sc); 4573 else 4574 mxge_rem_single_irq(sc); 4575 } 4576 4577 static int 4578 mxge_add_irq(mxge_softc_t *sc) 4579 { 4580 int err; 4581 4582 if (sc->num_slices > 1) 4583 err = mxge_add_msix_irqs(sc); 4584 else 4585 err = mxge_add_single_irq(sc); 4586 4587 if (0 && err == 0 && sc->num_slices > 1) { 4588 mxge_rem_msix_irqs(sc); 4589 err = mxge_add_msix_irqs(sc); 4590 } 4591 return err; 4592 } 4593 4594 4595 static int 4596 mxge_attach(device_t dev) 4597 { 4598 mxge_softc_t *sc = device_get_softc(dev); 4599 struct ifnet *ifp; 4600 int err, rid; 4601 4602 sc->dev = dev; 4603 mxge_fetch_tunables(sc); 4604 4605 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4606 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4607 taskqueue_thread_enqueue, 4608 &sc->tq); 4609 if (sc->tq == NULL) { 4610 err = ENOMEM; 4611 goto abort_with_nothing; 4612 } 4613 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4614 device_get_nameunit(sc->dev)); 4615 4616 err = bus_dma_tag_create(NULL, /* parent */ 4617 1, /* alignment */ 4618 0, /* boundary */ 4619 BUS_SPACE_MAXADDR, /* low */ 4620 BUS_SPACE_MAXADDR, /* high */ 4621 NULL, NULL, /* filter */ 4622 65536 + 256, /* maxsize */ 4623 MXGE_MAX_SEND_DESC, /* num segs */ 4624 65536, /* maxsegsize */ 4625 0, /* flags */ 4626 NULL, NULL, /* lock */ 4627 &sc->parent_dmat); /* tag */ 4628 4629 if (err != 0) { 4630 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4631 err); 4632 goto abort_with_tq; 4633 } 4634 4635 ifp = sc->ifp = if_alloc(IFT_ETHER); 4636 if (ifp == NULL) { 4637 device_printf(dev, "can not if_alloc()\n"); 4638 err = ENOSPC; 4639 goto abort_with_parent_dmat; 4640 } 4641 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4642 4643 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4644 device_get_nameunit(dev)); 4645 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4646 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4647 "%s:drv", device_get_nameunit(dev)); 4648 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4649 MTX_NETWORK_LOCK, MTX_DEF); 4650 4651 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4652 4653 mxge_setup_cfg_space(sc); 4654 4655 /* Map the board into the kernel */ 4656 rid = PCIR_BARS; 4657 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4658 ~0, 1, RF_ACTIVE); 4659 if (sc->mem_res == NULL) { 4660 device_printf(dev, "could not map memory\n"); 4661 err = ENXIO; 4662 goto abort_with_lock; 4663 } 4664 sc->sram = rman_get_virtual(sc->mem_res); 4665 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4666 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4667 device_printf(dev, "impossible memory region size %ld\n", 4668 rman_get_size(sc->mem_res)); 4669 err = ENXIO; 4670 goto abort_with_mem_res; 4671 } 4672 4673 /* make NULL terminated copy of the EEPROM strings section of 4674 lanai SRAM */ 4675 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4676 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4677 rman_get_bushandle(sc->mem_res), 4678 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4679 sc->eeprom_strings, 4680 MXGE_EEPROM_STRINGS_SIZE - 2); 4681 err = mxge_parse_strings(sc); 4682 if (err != 0) 4683 goto abort_with_mem_res; 4684 4685 /* Enable write combining for efficient use of PCIe bus */ 4686 mxge_enable_wc(sc); 4687 4688 /* Allocate the out of band dma memory */ 4689 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4690 sizeof (mxge_cmd_t), 64); 4691 if (err != 0) 4692 goto abort_with_mem_res; 4693 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4694 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4695 if (err != 0) 4696 goto abort_with_cmd_dma; 4697 4698 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4699 if (err != 0) 4700 goto abort_with_zeropad_dma; 4701 4702 /* select & load the firmware */ 4703 err = mxge_select_firmware(sc); 4704 if (err != 0) 4705 goto abort_with_dmabench; 4706 sc->intr_coal_delay = mxge_intr_coal_delay; 4707 4708 mxge_slice_probe(sc); 4709 err = mxge_alloc_slices(sc); 4710 if (err != 0) 4711 goto abort_with_dmabench; 4712 4713 err = mxge_reset(sc, 0); 4714 if (err != 0) 4715 goto abort_with_slices; 4716 4717 err = mxge_alloc_rings(sc); 4718 if (err != 0) { 4719 device_printf(sc->dev, "failed to allocate rings\n"); 4720 goto abort_with_dmabench; 4721 } 4722 4723 err = mxge_add_irq(sc); 4724 if (err != 0) { 4725 device_printf(sc->dev, "failed to add irq\n"); 4726 goto abort_with_rings; 4727 } 4728 4729 ifp->if_baudrate = IF_Gbps(10UL); 4730 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4731 IFCAP_VLAN_MTU; 4732 #ifdef INET 4733 ifp->if_capabilities |= IFCAP_LRO; 4734 #endif 4735 4736 #ifdef MXGE_NEW_VLAN_API 4737 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4738 #endif 4739 4740 sc->max_mtu = mxge_max_mtu(sc); 4741 if (sc->max_mtu >= 9000) 4742 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4743 else 4744 device_printf(dev, "MTU limited to %d. Install " 4745 "latest firmware for 9000 byte jumbo support\n", 4746 sc->max_mtu - ETHER_HDR_LEN); 4747 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4748 ifp->if_capenable = ifp->if_capabilities; 4749 if (sc->lro_cnt == 0) 4750 ifp->if_capenable &= ~IFCAP_LRO; 4751 sc->csum_flag = 1; 4752 ifp->if_init = mxge_init; 4753 ifp->if_softc = sc; 4754 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4755 ifp->if_ioctl = mxge_ioctl; 4756 ifp->if_start = mxge_start; 4757 /* Initialise the ifmedia structure */ 4758 ifmedia_init(&sc->media, 0, mxge_media_change, 4759 mxge_media_status); 4760 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4761 mxge_media_probe(sc); 4762 sc->dying = 0; 4763 ether_ifattach(ifp, sc->mac_addr); 4764 /* ether_ifattach sets mtu to ETHERMTU */ 4765 if (mxge_initial_mtu != ETHERMTU) 4766 mxge_change_mtu(sc, mxge_initial_mtu); 4767 4768 mxge_add_sysctls(sc); 4769 #ifdef IFNET_BUF_RING 4770 ifp->if_transmit = mxge_transmit; 4771 ifp->if_qflush = mxge_qflush; 4772 #endif 4773 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4774 return 0; 4775 4776 abort_with_rings: 4777 mxge_free_rings(sc); 4778 abort_with_slices: 4779 mxge_free_slices(sc); 4780 abort_with_dmabench: 4781 mxge_dma_free(&sc->dmabench_dma); 4782 abort_with_zeropad_dma: 4783 mxge_dma_free(&sc->zeropad_dma); 4784 abort_with_cmd_dma: 4785 mxge_dma_free(&sc->cmd_dma); 4786 abort_with_mem_res: 4787 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4788 abort_with_lock: 4789 pci_disable_busmaster(dev); 4790 mtx_destroy(&sc->cmd_mtx); 4791 mtx_destroy(&sc->driver_mtx); 4792 if_free(ifp); 4793 abort_with_parent_dmat: 4794 bus_dma_tag_destroy(sc->parent_dmat); 4795 abort_with_tq: 4796 if (sc->tq != NULL) { 4797 taskqueue_drain(sc->tq, &sc->watchdog_task); 4798 taskqueue_free(sc->tq); 4799 sc->tq = NULL; 4800 } 4801 abort_with_nothing: 4802 return err; 4803 } 4804 4805 static int 4806 mxge_detach(device_t dev) 4807 { 4808 mxge_softc_t *sc = device_get_softc(dev); 4809 4810 if (mxge_vlans_active(sc)) { 4811 device_printf(sc->dev, 4812 "Detach vlans before removing module\n"); 4813 return EBUSY; 4814 } 4815 mtx_lock(&sc->driver_mtx); 4816 sc->dying = 1; 4817 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4818 mxge_close(sc, 0); 4819 mtx_unlock(&sc->driver_mtx); 4820 ether_ifdetach(sc->ifp); 4821 if (sc->tq != NULL) { 4822 taskqueue_drain(sc->tq, &sc->watchdog_task); 4823 taskqueue_free(sc->tq); 4824 sc->tq = NULL; 4825 } 4826 callout_drain(&sc->co_hdl); 4827 ifmedia_removeall(&sc->media); 4828 mxge_dummy_rdma(sc, 0); 4829 mxge_rem_sysctls(sc); 4830 mxge_rem_irq(sc); 4831 mxge_free_rings(sc); 4832 mxge_free_slices(sc); 4833 mxge_dma_free(&sc->dmabench_dma); 4834 mxge_dma_free(&sc->zeropad_dma); 4835 mxge_dma_free(&sc->cmd_dma); 4836 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4837 pci_disable_busmaster(dev); 4838 mtx_destroy(&sc->cmd_mtx); 4839 mtx_destroy(&sc->driver_mtx); 4840 if_free(sc->ifp); 4841 bus_dma_tag_destroy(sc->parent_dmat); 4842 return 0; 4843 } 4844 4845 static int 4846 mxge_shutdown(device_t dev) 4847 { 4848 return 0; 4849 } 4850 4851 /* 4852 This file uses Myri10GE driver indentation. 4853 4854 Local Variables: 4855 c-file-style:"linux" 4856 tab-width:8 4857 End: 4858 */ 4859