1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 /* count xmits ourselves, rather than via drbr */ 51 #define NO_SLOW_STATS 52 #include <net/if.h> 53 #include <net/if_arp.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/if_vlan_var.h> 62 #include <net/zlib.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 69 #include <machine/bus.h> 70 #include <machine/in_cksum.h> 71 #include <machine/resource.h> 72 #include <sys/bus.h> 73 #include <sys/rman.h> 74 #include <sys/smp.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 /*#define MXGE_FAKE_IFP*/ 90 #include <dev/mxge/if_mxge_var.h> 91 #ifdef IFNET_BUF_RING 92 #include <sys/buf_ring.h> 93 #endif 94 95 #include "opt_inet.h" 96 97 /* tunable params */ 98 static int mxge_nvidia_ecrc_enable = 1; 99 static int mxge_force_firmware = 0; 100 static int mxge_intr_coal_delay = 30; 101 static int mxge_deassert_wait = 1; 102 static int mxge_flow_control = 1; 103 static int mxge_verbose = 0; 104 static int mxge_lro_cnt = 8; 105 static int mxge_ticks; 106 static int mxge_max_slices = 1; 107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108 static int mxge_always_promisc = 0; 109 static int mxge_initial_mtu = ETHERMTU_JUMBO; 110 static int mxge_throttle = 0; 111 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112 static char *mxge_fw_aligned = "mxge_eth_z8e"; 113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116 static int mxge_probe(device_t dev); 117 static int mxge_attach(device_t dev); 118 static int mxge_detach(device_t dev); 119 static int mxge_shutdown(device_t dev); 120 static void mxge_intr(void *arg); 121 122 static device_method_t mxge_methods[] = 123 { 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130 }; 131 132 static driver_t mxge_driver = 133 { 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137 }; 138 139 static devclass_t mxge_devclass; 140 141 /* Declare ourselves to be a child of the PCI bus.*/ 142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148 static int mxge_close(mxge_softc_t *sc, int down); 149 static int mxge_open(mxge_softc_t *sc); 150 static void mxge_tick(void *arg); 151 152 static int 153 mxge_probe(device_t dev) 154 { 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 200 /* callback to get our DMA address */ 201 static void 202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204 { 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263 abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265 abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268 } 269 270 271 static void 272 mxge_dma_free(mxge_dma_t *dma) 273 { 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277 } 278 279 /* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286 static int 287 mxge_parse_strings(mxge_softc_t *sc) 288 { 289 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327 } 328 329 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330 static void 331 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332 { 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381 #if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390 #endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452 } 453 #else 454 static void 455 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456 { 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460 } 461 #endif 462 463 464 static int 465 mxge_dma_test(mxge_softc_t *sc, int test_type) 466 { 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517 abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523 } 524 525 /* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544 static int 545 mxge_firmware_probe(mxge_softc_t *sc) 546 { 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595 } 596 597 static int 598 mxge_select_firmware(mxge_softc_t *sc) 599 { 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631 abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640 } 641 642 union qualhack 643 { 644 const char *ro_char; 645 char *rw_char; 646 }; 647 648 static int 649 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650 { 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static void * 680 z_alloc(void *nil, u_int items, u_int size) 681 { 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686 } 687 688 static void 689 z_free(void *nil, void *ptr) 690 { 691 free(ptr, M_TEMP); 692 } 693 694 695 static int 696 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697 { 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770 abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772 abort_with_zs: 773 inflateEnd(&zs); 774 abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777 } 778 779 /* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784 static void 785 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786 { 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834 } 835 836 static int 837 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838 { 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 default: 887 device_printf(sc->dev, 888 "mxge: command %d " 889 "failed, result = %d\n", 890 cmd, be32toh(response->result)); 891 err = ENXIO; 892 break; 893 } 894 if (err != EAGAIN) 895 break; 896 } 897 if (err == EAGAIN) 898 device_printf(sc->dev, "mxge: command %d timed out" 899 "result = %d\n", 900 cmd, be32toh(response->result)); 901 mtx_unlock(&sc->cmd_mtx); 902 return err; 903 } 904 905 static int 906 mxge_adopt_running_firmware(mxge_softc_t *sc) 907 { 908 struct mcp_gen_header *hdr; 909 const size_t bytes = sizeof (struct mcp_gen_header); 910 size_t hdr_offset; 911 int status; 912 913 /* find running firmware header */ 914 hdr_offset = htobe32(*(volatile uint32_t *) 915 (sc->sram + MCP_HEADER_PTR_OFFSET)); 916 917 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 918 device_printf(sc->dev, 919 "Running firmware has bad header offset (%d)\n", 920 (int)hdr_offset); 921 return EIO; 922 } 923 924 /* copy header of running firmware from SRAM to host memory to 925 * validate firmware */ 926 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 927 if (hdr == NULL) { 928 device_printf(sc->dev, "could not malloc firmware hdr\n"); 929 return ENOMEM; 930 } 931 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 932 rman_get_bushandle(sc->mem_res), 933 hdr_offset, (char *)hdr, bytes); 934 status = mxge_validate_firmware(sc, hdr); 935 free(hdr, M_DEVBUF); 936 937 /* 938 * check to see if adopted firmware has bug where adopting 939 * it will cause broadcasts to be filtered unless the NIC 940 * is kept in ALLMULTI mode 941 */ 942 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 943 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 944 sc->adopted_rx_filter_bug = 1; 945 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 946 "working around rx filter bug\n", 947 sc->fw_ver_major, sc->fw_ver_minor, 948 sc->fw_ver_tiny); 949 } 950 951 return status; 952 } 953 954 955 static int 956 mxge_load_firmware(mxge_softc_t *sc, int adopt) 957 { 958 volatile uint32_t *confirm; 959 volatile char *submit; 960 char buf_bytes[72]; 961 uint32_t *buf, size, dma_low, dma_high; 962 int status, i; 963 964 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 965 966 size = sc->sram_size; 967 status = mxge_load_firmware_helper(sc, &size); 968 if (status) { 969 if (!adopt) 970 return status; 971 /* Try to use the currently running firmware, if 972 it is new enough */ 973 status = mxge_adopt_running_firmware(sc); 974 if (status) { 975 device_printf(sc->dev, 976 "failed to adopt running firmware\n"); 977 return status; 978 } 979 device_printf(sc->dev, 980 "Successfully adopted running firmware\n"); 981 if (sc->tx_boundary == 4096) { 982 device_printf(sc->dev, 983 "Using firmware currently running on NIC" 984 ". For optimal\n"); 985 device_printf(sc->dev, 986 "performance consider loading optimized " 987 "firmware\n"); 988 } 989 sc->fw_name = mxge_fw_unaligned; 990 sc->tx_boundary = 2048; 991 return 0; 992 } 993 /* clear confirmation addr */ 994 confirm = (volatile uint32_t *)sc->cmd; 995 *confirm = 0; 996 wmb(); 997 /* send a reload command to the bootstrap MCP, and wait for the 998 response in the confirmation address. The firmware should 999 write a -1 there to indicate it is alive and well 1000 */ 1001 1002 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1003 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1004 1005 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1006 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1007 buf[2] = htobe32(0xffffffff); /* confirm data */ 1008 1009 /* FIX: All newest firmware should un-protect the bottom of 1010 the sram before handoff. However, the very first interfaces 1011 do not. Therefore the handoff copy must skip the first 8 bytes 1012 */ 1013 /* where the code starts*/ 1014 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1015 buf[4] = htobe32(size - 8); /* length of code */ 1016 buf[5] = htobe32(8); /* where to copy to */ 1017 buf[6] = htobe32(0); /* where to jump to */ 1018 1019 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1020 mxge_pio_copy(submit, buf, 64); 1021 wmb(); 1022 DELAY(1000); 1023 wmb(); 1024 i = 0; 1025 while (*confirm != 0xffffffff && i < 20) { 1026 DELAY(1000*10); 1027 i++; 1028 bus_dmamap_sync(sc->cmd_dma.dmat, 1029 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1030 } 1031 if (*confirm != 0xffffffff) { 1032 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1033 confirm, *confirm); 1034 1035 return ENXIO; 1036 } 1037 return 0; 1038 } 1039 1040 static int 1041 mxge_update_mac_address(mxge_softc_t *sc) 1042 { 1043 mxge_cmd_t cmd; 1044 uint8_t *addr = sc->mac_addr; 1045 int status; 1046 1047 1048 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1049 | (addr[2] << 8) | addr[3]); 1050 1051 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1052 1053 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1054 return status; 1055 } 1056 1057 static int 1058 mxge_change_pause(mxge_softc_t *sc, int pause) 1059 { 1060 mxge_cmd_t cmd; 1061 int status; 1062 1063 if (pause) 1064 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1065 &cmd); 1066 else 1067 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1068 &cmd); 1069 1070 if (status) { 1071 device_printf(sc->dev, "Failed to set flow control mode\n"); 1072 return ENXIO; 1073 } 1074 sc->pause = pause; 1075 return 0; 1076 } 1077 1078 static void 1079 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1080 { 1081 mxge_cmd_t cmd; 1082 int status; 1083 1084 if (mxge_always_promisc) 1085 promisc = 1; 1086 1087 if (promisc) 1088 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1089 &cmd); 1090 else 1091 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1092 &cmd); 1093 1094 if (status) { 1095 device_printf(sc->dev, "Failed to set promisc mode\n"); 1096 } 1097 } 1098 1099 static void 1100 mxge_set_multicast_list(mxge_softc_t *sc) 1101 { 1102 mxge_cmd_t cmd; 1103 struct ifmultiaddr *ifma; 1104 struct ifnet *ifp = sc->ifp; 1105 int err; 1106 1107 /* This firmware is known to not support multicast */ 1108 if (!sc->fw_multicast_support) 1109 return; 1110 1111 /* Disable multicast filtering while we play with the lists*/ 1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1113 if (err != 0) { 1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1115 " error status: %d\n", err); 1116 return; 1117 } 1118 1119 if (sc->adopted_rx_filter_bug) 1120 return; 1121 1122 if (ifp->if_flags & IFF_ALLMULTI) 1123 /* request to disable multicast filtering, so quit here */ 1124 return; 1125 1126 /* Flush all the filters */ 1127 1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1129 if (err != 0) { 1130 device_printf(sc->dev, 1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1132 ", error status: %d\n", err); 1133 return; 1134 } 1135 1136 /* Walk the multicast list, and add each address */ 1137 1138 if_maddr_rlock(ifp); 1139 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1140 if (ifma->ifma_addr->sa_family != AF_LINK) 1141 continue; 1142 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1143 &cmd.data0, 4); 1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1145 &cmd.data1, 2); 1146 cmd.data0 = htonl(cmd.data0); 1147 cmd.data1 = htonl(cmd.data1); 1148 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1149 if (err != 0) { 1150 device_printf(sc->dev, "Failed " 1151 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1152 "%d\t", err); 1153 /* abort, leaving multicast filtering off */ 1154 if_maddr_runlock(ifp); 1155 return; 1156 } 1157 } 1158 if_maddr_runlock(ifp); 1159 /* Enable multicast filtering */ 1160 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1161 if (err != 0) { 1162 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1163 ", error status: %d\n", err); 1164 } 1165 } 1166 1167 static int 1168 mxge_max_mtu(mxge_softc_t *sc) 1169 { 1170 mxge_cmd_t cmd; 1171 int status; 1172 1173 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1174 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1175 1176 /* try to set nbufs to see if it we can 1177 use virtually contiguous jumbos */ 1178 cmd.data0 = 0; 1179 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1180 &cmd); 1181 if (status == 0) 1182 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1183 1184 /* otherwise, we're limited to MJUMPAGESIZE */ 1185 return MJUMPAGESIZE - MXGEFW_PAD; 1186 } 1187 1188 static int 1189 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1190 { 1191 struct mxge_slice_state *ss; 1192 mxge_rx_done_t *rx_done; 1193 volatile uint32_t *irq_claim; 1194 mxge_cmd_t cmd; 1195 int slice, status; 1196 1197 /* try to send a reset command to the card to see if it 1198 is alive */ 1199 memset(&cmd, 0, sizeof (cmd)); 1200 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1201 if (status != 0) { 1202 device_printf(sc->dev, "failed reset\n"); 1203 return ENXIO; 1204 } 1205 1206 mxge_dummy_rdma(sc, 1); 1207 1208 1209 /* set the intrq size */ 1210 cmd.data0 = sc->rx_ring_size; 1211 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1212 1213 /* 1214 * Even though we already know how many slices are supported 1215 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1216 * has magic side effects, and must be called after a reset. 1217 * It must be called prior to calling any RSS related cmds, 1218 * including assigning an interrupt queue for anything but 1219 * slice 0. It must also be called *after* 1220 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1221 * the firmware to compute offsets. 1222 */ 1223 1224 if (sc->num_slices > 1) { 1225 /* ask the maximum number of slices it supports */ 1226 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1227 &cmd); 1228 if (status != 0) { 1229 device_printf(sc->dev, 1230 "failed to get number of slices\n"); 1231 return status; 1232 } 1233 /* 1234 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1235 * to setting up the interrupt queue DMA 1236 */ 1237 cmd.data0 = sc->num_slices; 1238 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1239 #ifdef IFNET_BUF_RING 1240 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1241 #endif 1242 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1243 &cmd); 1244 if (status != 0) { 1245 device_printf(sc->dev, 1246 "failed to set number of slices\n"); 1247 return status; 1248 } 1249 } 1250 1251 1252 if (interrupts_setup) { 1253 /* Now exchange information about interrupts */ 1254 for (slice = 0; slice < sc->num_slices; slice++) { 1255 rx_done = &sc->ss[slice].rx_done; 1256 memset(rx_done->entry, 0, sc->rx_ring_size); 1257 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1258 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1259 cmd.data2 = slice; 1260 status |= mxge_send_cmd(sc, 1261 MXGEFW_CMD_SET_INTRQ_DMA, 1262 &cmd); 1263 } 1264 } 1265 1266 status |= mxge_send_cmd(sc, 1267 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1268 1269 1270 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1271 1272 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1273 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 1276 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1277 &cmd); 1278 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1279 if (status != 0) { 1280 device_printf(sc->dev, "failed set interrupt parameters\n"); 1281 return status; 1282 } 1283 1284 1285 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1286 1287 1288 /* run a DMA benchmark */ 1289 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1290 1291 for (slice = 0; slice < sc->num_slices; slice++) { 1292 ss = &sc->ss[slice]; 1293 1294 ss->irq_claim = irq_claim + (2 * slice); 1295 /* reset mcp/driver shared state back to 0 */ 1296 ss->rx_done.idx = 0; 1297 ss->rx_done.cnt = 0; 1298 ss->tx.req = 0; 1299 ss->tx.done = 0; 1300 ss->tx.pkt_done = 0; 1301 ss->tx.queue_active = 0; 1302 ss->tx.activate = 0; 1303 ss->tx.deactivate = 0; 1304 ss->tx.wake = 0; 1305 ss->tx.defrag = 0; 1306 ss->tx.stall = 0; 1307 ss->rx_big.cnt = 0; 1308 ss->rx_small.cnt = 0; 1309 ss->lro_bad_csum = 0; 1310 ss->lro_queued = 0; 1311 ss->lro_flushed = 0; 1312 if (ss->fw_stats != NULL) { 1313 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1314 } 1315 } 1316 sc->rdma_tags_available = 15; 1317 status = mxge_update_mac_address(sc); 1318 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1319 mxge_change_pause(sc, sc->pause); 1320 mxge_set_multicast_list(sc); 1321 if (sc->throttle) { 1322 cmd.data0 = sc->throttle; 1323 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1324 &cmd)) { 1325 device_printf(sc->dev, 1326 "can't enable throttle\n"); 1327 } 1328 } 1329 return status; 1330 } 1331 1332 static int 1333 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1334 { 1335 mxge_cmd_t cmd; 1336 mxge_softc_t *sc; 1337 int err; 1338 unsigned int throttle; 1339 1340 sc = arg1; 1341 throttle = sc->throttle; 1342 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1343 if (err != 0) { 1344 return err; 1345 } 1346 1347 if (throttle == sc->throttle) 1348 return 0; 1349 1350 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1351 return EINVAL; 1352 1353 mtx_lock(&sc->driver_mtx); 1354 cmd.data0 = throttle; 1355 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1356 if (err == 0) 1357 sc->throttle = throttle; 1358 mtx_unlock(&sc->driver_mtx); 1359 return err; 1360 } 1361 1362 static int 1363 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1364 { 1365 mxge_softc_t *sc; 1366 unsigned int intr_coal_delay; 1367 int err; 1368 1369 sc = arg1; 1370 intr_coal_delay = sc->intr_coal_delay; 1371 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1372 if (err != 0) { 1373 return err; 1374 } 1375 if (intr_coal_delay == sc->intr_coal_delay) 1376 return 0; 1377 1378 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1379 return EINVAL; 1380 1381 mtx_lock(&sc->driver_mtx); 1382 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1383 sc->intr_coal_delay = intr_coal_delay; 1384 1385 mtx_unlock(&sc->driver_mtx); 1386 return err; 1387 } 1388 1389 static int 1390 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1391 { 1392 mxge_softc_t *sc; 1393 unsigned int enabled; 1394 int err; 1395 1396 sc = arg1; 1397 enabled = sc->pause; 1398 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1399 if (err != 0) { 1400 return err; 1401 } 1402 if (enabled == sc->pause) 1403 return 0; 1404 1405 mtx_lock(&sc->driver_mtx); 1406 err = mxge_change_pause(sc, enabled); 1407 mtx_unlock(&sc->driver_mtx); 1408 return err; 1409 } 1410 1411 static int 1412 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1413 { 1414 struct ifnet *ifp; 1415 int err = 0; 1416 1417 ifp = sc->ifp; 1418 if (lro_cnt == 0) 1419 ifp->if_capenable &= ~IFCAP_LRO; 1420 else 1421 ifp->if_capenable |= IFCAP_LRO; 1422 sc->lro_cnt = lro_cnt; 1423 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1424 mxge_close(sc, 0); 1425 err = mxge_open(sc); 1426 } 1427 return err; 1428 } 1429 1430 static int 1431 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1432 { 1433 mxge_softc_t *sc; 1434 unsigned int lro_cnt; 1435 int err; 1436 1437 sc = arg1; 1438 lro_cnt = sc->lro_cnt; 1439 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1440 if (err != 0) 1441 return err; 1442 1443 if (lro_cnt == sc->lro_cnt) 1444 return 0; 1445 1446 if (lro_cnt > 128) 1447 return EINVAL; 1448 1449 mtx_lock(&sc->driver_mtx); 1450 err = mxge_change_lro_locked(sc, lro_cnt); 1451 mtx_unlock(&sc->driver_mtx); 1452 return err; 1453 } 1454 1455 static int 1456 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1457 { 1458 int err; 1459 1460 if (arg1 == NULL) 1461 return EFAULT; 1462 arg2 = be32toh(*(int *)arg1); 1463 arg1 = NULL; 1464 err = sysctl_handle_int(oidp, arg1, arg2, req); 1465 1466 return err; 1467 } 1468 1469 static void 1470 mxge_rem_sysctls(mxge_softc_t *sc) 1471 { 1472 struct mxge_slice_state *ss; 1473 int slice; 1474 1475 if (sc->slice_sysctl_tree == NULL) 1476 return; 1477 1478 for (slice = 0; slice < sc->num_slices; slice++) { 1479 ss = &sc->ss[slice]; 1480 if (ss == NULL || ss->sysctl_tree == NULL) 1481 continue; 1482 sysctl_ctx_free(&ss->sysctl_ctx); 1483 ss->sysctl_tree = NULL; 1484 } 1485 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1486 sc->slice_sysctl_tree = NULL; 1487 } 1488 1489 static void 1490 mxge_add_sysctls(mxge_softc_t *sc) 1491 { 1492 struct sysctl_ctx_list *ctx; 1493 struct sysctl_oid_list *children; 1494 mcp_irq_data_t *fw; 1495 struct mxge_slice_state *ss; 1496 int slice; 1497 char slice_num[8]; 1498 1499 ctx = device_get_sysctl_ctx(sc->dev); 1500 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1501 fw = sc->ss[0].fw_stats; 1502 1503 /* random information */ 1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1505 "firmware_version", 1506 CTLFLAG_RD, &sc->fw_version, 1507 0, "firmware version"); 1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1509 "serial_number", 1510 CTLFLAG_RD, &sc->serial_number_string, 1511 0, "serial number"); 1512 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1513 "product_code", 1514 CTLFLAG_RD, &sc->product_code_string, 1515 0, "product_code"); 1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1517 "pcie_link_width", 1518 CTLFLAG_RD, &sc->link_width, 1519 0, "tx_boundary"); 1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1521 "tx_boundary", 1522 CTLFLAG_RD, &sc->tx_boundary, 1523 0, "tx_boundary"); 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1525 "write_combine", 1526 CTLFLAG_RD, &sc->wc, 1527 0, "write combining PIO?"); 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1529 "read_dma_MBs", 1530 CTLFLAG_RD, &sc->read_dma, 1531 0, "DMA Read speed in MB/s"); 1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1533 "write_dma_MBs", 1534 CTLFLAG_RD, &sc->write_dma, 1535 0, "DMA Write speed in MB/s"); 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1537 "read_write_dma_MBs", 1538 CTLFLAG_RD, &sc->read_write_dma, 1539 0, "DMA concurrent Read/Write speed in MB/s"); 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1541 "watchdog_resets", 1542 CTLFLAG_RD, &sc->watchdog_resets, 1543 0, "Number of times NIC was reset"); 1544 1545 1546 /* performance related tunables */ 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "intr_coal_delay", 1549 CTLTYPE_INT|CTLFLAG_RW, sc, 1550 0, mxge_change_intr_coal, 1551 "I", "interrupt coalescing delay in usecs"); 1552 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "throttle", 1555 CTLTYPE_INT|CTLFLAG_RW, sc, 1556 0, mxge_change_throttle, 1557 "I", "transmit throttling"); 1558 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "flow_control_enabled", 1561 CTLTYPE_INT|CTLFLAG_RW, sc, 1562 0, mxge_change_flow_control, 1563 "I", "interrupt coalescing delay in usecs"); 1564 1565 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1566 "deassert_wait", 1567 CTLFLAG_RW, &mxge_deassert_wait, 1568 0, "Wait for IRQ line to go low in ihandler"); 1569 1570 /* stats block from firmware is in network byte order. 1571 Need to swap it */ 1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1573 "link_up", 1574 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1575 0, mxge_handle_be32, 1576 "I", "link up"); 1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1578 "rdma_tags_available", 1579 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1580 0, mxge_handle_be32, 1581 "I", "rdma_tags_available"); 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_bad_crc32", 1584 CTLTYPE_INT|CTLFLAG_RD, 1585 &fw->dropped_bad_crc32, 1586 0, mxge_handle_be32, 1587 "I", "dropped_bad_crc32"); 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "dropped_bad_phy", 1590 CTLTYPE_INT|CTLFLAG_RD, 1591 &fw->dropped_bad_phy, 1592 0, mxge_handle_be32, 1593 "I", "dropped_bad_phy"); 1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1595 "dropped_link_error_or_filtered", 1596 CTLTYPE_INT|CTLFLAG_RD, 1597 &fw->dropped_link_error_or_filtered, 1598 0, mxge_handle_be32, 1599 "I", "dropped_link_error_or_filtered"); 1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1601 "dropped_link_overflow", 1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1603 0, mxge_handle_be32, 1604 "I", "dropped_link_overflow"); 1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1606 "dropped_multicast_filtered", 1607 CTLTYPE_INT|CTLFLAG_RD, 1608 &fw->dropped_multicast_filtered, 1609 0, mxge_handle_be32, 1610 "I", "dropped_multicast_filtered"); 1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1612 "dropped_no_big_buffer", 1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1614 0, mxge_handle_be32, 1615 "I", "dropped_no_big_buffer"); 1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1617 "dropped_no_small_buffer", 1618 CTLTYPE_INT|CTLFLAG_RD, 1619 &fw->dropped_no_small_buffer, 1620 0, mxge_handle_be32, 1621 "I", "dropped_no_small_buffer"); 1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1623 "dropped_overrun", 1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1625 0, mxge_handle_be32, 1626 "I", "dropped_overrun"); 1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1628 "dropped_pause", 1629 CTLTYPE_INT|CTLFLAG_RD, 1630 &fw->dropped_pause, 1631 0, mxge_handle_be32, 1632 "I", "dropped_pause"); 1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1634 "dropped_runt", 1635 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1636 0, mxge_handle_be32, 1637 "I", "dropped_runt"); 1638 1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1640 "dropped_unicast_filtered", 1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1642 0, mxge_handle_be32, 1643 "I", "dropped_unicast_filtered"); 1644 1645 /* verbose printing? */ 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "verbose", 1648 CTLFLAG_RW, &mxge_verbose, 1649 0, "verbose printing"); 1650 1651 /* lro */ 1652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1653 "lro_cnt", 1654 CTLTYPE_INT|CTLFLAG_RW, sc, 1655 0, mxge_change_lro, 1656 "I", "number of lro merge queues"); 1657 1658 1659 /* add counters exported for debugging from all slices */ 1660 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1661 sc->slice_sysctl_tree = 1662 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1663 "slice", CTLFLAG_RD, 0, ""); 1664 1665 for (slice = 0; slice < sc->num_slices; slice++) { 1666 ss = &sc->ss[slice]; 1667 sysctl_ctx_init(&ss->sysctl_ctx); 1668 ctx = &ss->sysctl_ctx; 1669 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1670 sprintf(slice_num, "%d", slice); 1671 ss->sysctl_tree = 1672 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1673 CTLFLAG_RD, 0, ""); 1674 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "rx_small_cnt", 1677 CTLFLAG_RD, &ss->rx_small.cnt, 1678 0, "rx_small_cnt"); 1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1680 "rx_big_cnt", 1681 CTLFLAG_RD, &ss->rx_big.cnt, 1682 0, "rx_small_cnt"); 1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1684 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1685 0, "number of lro merge queues flushed"); 1686 1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1688 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1689 0, "number of frames appended to lro merge" 1690 "queues"); 1691 1692 #ifndef IFNET_BUF_RING 1693 /* only transmit from slice 0 for now */ 1694 if (slice > 0) 1695 continue; 1696 #endif 1697 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1698 "tx_req", 1699 CTLFLAG_RD, &ss->tx.req, 1700 0, "tx_req"); 1701 1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1703 "tx_done", 1704 CTLFLAG_RD, &ss->tx.done, 1705 0, "tx_done"); 1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1707 "tx_pkt_done", 1708 CTLFLAG_RD, &ss->tx.pkt_done, 1709 0, "tx_done"); 1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1711 "tx_stall", 1712 CTLFLAG_RD, &ss->tx.stall, 1713 0, "tx_stall"); 1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1715 "tx_wake", 1716 CTLFLAG_RD, &ss->tx.wake, 1717 0, "tx_wake"); 1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1719 "tx_defrag", 1720 CTLFLAG_RD, &ss->tx.defrag, 1721 0, "tx_defrag"); 1722 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1723 "tx_queue_active", 1724 CTLFLAG_RD, &ss->tx.queue_active, 1725 0, "tx_queue_active"); 1726 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1727 "tx_activate", 1728 CTLFLAG_RD, &ss->tx.activate, 1729 0, "tx_activate"); 1730 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1731 "tx_deactivate", 1732 CTLFLAG_RD, &ss->tx.deactivate, 1733 0, "tx_deactivate"); 1734 } 1735 } 1736 1737 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1738 backwards one at a time and handle ring wraps */ 1739 1740 static inline void 1741 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1742 mcp_kreq_ether_send_t *src, int cnt) 1743 { 1744 int idx, starting_slot; 1745 starting_slot = tx->req; 1746 while (cnt > 1) { 1747 cnt--; 1748 idx = (starting_slot + cnt) & tx->mask; 1749 mxge_pio_copy(&tx->lanai[idx], 1750 &src[cnt], sizeof(*src)); 1751 wmb(); 1752 } 1753 } 1754 1755 /* 1756 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1757 * at most 32 bytes at a time, so as to avoid involving the software 1758 * pio handler in the nic. We re-write the first segment's flags 1759 * to mark them valid only after writing the entire chain 1760 */ 1761 1762 static inline void 1763 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1764 int cnt) 1765 { 1766 int idx, i; 1767 uint32_t *src_ints; 1768 volatile uint32_t *dst_ints; 1769 mcp_kreq_ether_send_t *srcp; 1770 volatile mcp_kreq_ether_send_t *dstp, *dst; 1771 uint8_t last_flags; 1772 1773 idx = tx->req & tx->mask; 1774 1775 last_flags = src->flags; 1776 src->flags = 0; 1777 wmb(); 1778 dst = dstp = &tx->lanai[idx]; 1779 srcp = src; 1780 1781 if ((idx + cnt) < tx->mask) { 1782 for (i = 0; i < (cnt - 1); i += 2) { 1783 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1784 wmb(); /* force write every 32 bytes */ 1785 srcp += 2; 1786 dstp += 2; 1787 } 1788 } else { 1789 /* submit all but the first request, and ensure 1790 that it is submitted below */ 1791 mxge_submit_req_backwards(tx, src, cnt); 1792 i = 0; 1793 } 1794 if (i < cnt) { 1795 /* submit the first request */ 1796 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1797 wmb(); /* barrier before setting valid flag */ 1798 } 1799 1800 /* re-write the last 32-bits with the valid flags */ 1801 src->flags = last_flags; 1802 src_ints = (uint32_t *)src; 1803 src_ints+=3; 1804 dst_ints = (volatile uint32_t *)dst; 1805 dst_ints+=3; 1806 *dst_ints = *src_ints; 1807 tx->req += cnt; 1808 wmb(); 1809 } 1810 1811 #if IFCAP_TSO4 1812 1813 static void 1814 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1815 int busdma_seg_cnt, int ip_off) 1816 { 1817 mxge_tx_ring_t *tx; 1818 mcp_kreq_ether_send_t *req; 1819 bus_dma_segment_t *seg; 1820 struct ip *ip; 1821 struct tcphdr *tcp; 1822 uint32_t low, high_swapped; 1823 int len, seglen, cum_len, cum_len_next; 1824 int next_is_first, chop, cnt, rdma_count, small; 1825 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1826 uint8_t flags, flags_next; 1827 static int once; 1828 1829 mss = m->m_pkthdr.tso_segsz; 1830 1831 /* negative cum_len signifies to the 1832 * send loop that we are still in the 1833 * header portion of the TSO packet. 1834 */ 1835 1836 /* ensure we have the ethernet, IP and TCP 1837 header together in the first mbuf, copy 1838 it to a scratch buffer if not */ 1839 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1840 m_copydata(m, 0, ip_off + sizeof (*ip), 1841 ss->scratch); 1842 ip = (struct ip *)(ss->scratch + ip_off); 1843 } else { 1844 ip = (struct ip *)(mtod(m, char *) + ip_off); 1845 } 1846 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1847 + sizeof (*tcp))) { 1848 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1849 + sizeof (*tcp), ss->scratch); 1850 ip = (struct ip *)(mtod(m, char *) + ip_off); 1851 } 1852 1853 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1854 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1855 1856 /* TSO implies checksum offload on this hardware */ 1857 cksum_offset = ip_off + (ip->ip_hl << 2); 1858 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1859 1860 1861 /* for TSO, pseudo_hdr_offset holds mss. 1862 * The firmware figures out where to put 1863 * the checksum by parsing the header. */ 1864 pseudo_hdr_offset = htobe16(mss); 1865 1866 tx = &ss->tx; 1867 req = tx->req_list; 1868 seg = tx->seg_list; 1869 cnt = 0; 1870 rdma_count = 0; 1871 /* "rdma_count" is the number of RDMAs belonging to the 1872 * current packet BEFORE the current send request. For 1873 * non-TSO packets, this is equal to "count". 1874 * For TSO packets, rdma_count needs to be reset 1875 * to 0 after a segment cut. 1876 * 1877 * The rdma_count field of the send request is 1878 * the number of RDMAs of the packet starting at 1879 * that request. For TSO send requests with one ore more cuts 1880 * in the middle, this is the number of RDMAs starting 1881 * after the last cut in the request. All previous 1882 * segments before the last cut implicitly have 1 RDMA. 1883 * 1884 * Since the number of RDMAs is not known beforehand, 1885 * it must be filled-in retroactively - after each 1886 * segmentation cut or at the end of the entire packet. 1887 */ 1888 1889 while (busdma_seg_cnt) { 1890 /* Break the busdma segment up into pieces*/ 1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1893 len = seg->ds_len; 1894 1895 while (len) { 1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1897 seglen = len; 1898 cum_len_next = cum_len + seglen; 1899 (req-rdma_count)->rdma_count = rdma_count + 1; 1900 if (__predict_true(cum_len >= 0)) { 1901 /* payload */ 1902 chop = (cum_len_next > mss); 1903 cum_len_next = cum_len_next % mss; 1904 next_is_first = (cum_len_next == 0); 1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1906 flags_next |= next_is_first * 1907 MXGEFW_FLAGS_FIRST; 1908 rdma_count |= -(chop | next_is_first); 1909 rdma_count += chop & !next_is_first; 1910 } else if (cum_len_next >= 0) { 1911 /* header ends */ 1912 rdma_count = -1; 1913 cum_len_next = 0; 1914 seglen = -cum_len; 1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1916 flags_next = MXGEFW_FLAGS_TSO_PLD | 1917 MXGEFW_FLAGS_FIRST | 1918 (small * MXGEFW_FLAGS_SMALL); 1919 } 1920 1921 req->addr_high = high_swapped; 1922 req->addr_low = htobe32(low); 1923 req->pseudo_hdr_offset = pseudo_hdr_offset; 1924 req->pad = 0; 1925 req->rdma_count = 1; 1926 req->length = htobe16(seglen); 1927 req->cksum_offset = cksum_offset; 1928 req->flags = flags | ((cum_len & 1) * 1929 MXGEFW_FLAGS_ALIGN_ODD); 1930 low += seglen; 1931 len -= seglen; 1932 cum_len = cum_len_next; 1933 flags = flags_next; 1934 req++; 1935 cnt++; 1936 rdma_count++; 1937 if (__predict_false(cksum_offset > seglen)) 1938 cksum_offset -= seglen; 1939 else 1940 cksum_offset = 0; 1941 if (__predict_false(cnt > tx->max_desc)) 1942 goto drop; 1943 } 1944 busdma_seg_cnt--; 1945 seg++; 1946 } 1947 (req-rdma_count)->rdma_count = rdma_count; 1948 1949 do { 1950 req--; 1951 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1953 1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1955 mxge_submit_req(tx, tx->req_list, cnt); 1956 #ifdef IFNET_BUF_RING 1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1958 /* tell the NIC to start polling this slice */ 1959 *tx->send_go = 1; 1960 tx->queue_active = 1; 1961 tx->activate++; 1962 wmb(); 1963 } 1964 #endif 1965 return; 1966 1967 drop: 1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1969 m_freem(m); 1970 ss->oerrors++; 1971 if (!once) { 1972 printf("tx->max_desc exceeded via TSO!\n"); 1973 printf("mss = %d, %ld, %d!\n", mss, 1974 (long)seg - (long)tx->seg_list, tx->max_desc); 1975 once = 1; 1976 } 1977 return; 1978 1979 } 1980 1981 #endif /* IFCAP_TSO4 */ 1982 1983 #ifdef MXGE_NEW_VLAN_API 1984 /* 1985 * We reproduce the software vlan tag insertion from 1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1987 * vlan tag insertion. We need to advertise this in order to have the 1988 * vlan interface respect our csum offload flags. 1989 */ 1990 static struct mbuf * 1991 mxge_vlan_tag_insert(struct mbuf *m) 1992 { 1993 struct ether_vlan_header *evl; 1994 1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1996 if (__predict_false(m == NULL)) 1997 return NULL; 1998 if (m->m_len < sizeof(*evl)) { 1999 m = m_pullup(m, sizeof(*evl)); 2000 if (__predict_false(m == NULL)) 2001 return NULL; 2002 } 2003 /* 2004 * Transform the Ethernet header into an Ethernet header 2005 * with 802.1Q encapsulation. 2006 */ 2007 evl = mtod(m, struct ether_vlan_header *); 2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2012 m->m_flags &= ~M_VLANTAG; 2013 return m; 2014 } 2015 #endif /* MXGE_NEW_VLAN_API */ 2016 2017 static void 2018 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2019 { 2020 mxge_softc_t *sc; 2021 mcp_kreq_ether_send_t *req; 2022 bus_dma_segment_t *seg; 2023 struct mbuf *m_tmp; 2024 struct ifnet *ifp; 2025 mxge_tx_ring_t *tx; 2026 struct ip *ip; 2027 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2028 uint16_t pseudo_hdr_offset; 2029 uint8_t flags, cksum_offset; 2030 2031 2032 sc = ss->sc; 2033 ifp = sc->ifp; 2034 tx = &ss->tx; 2035 2036 ip_off = sizeof (struct ether_header); 2037 #ifdef MXGE_NEW_VLAN_API 2038 if (m->m_flags & M_VLANTAG) { 2039 m = mxge_vlan_tag_insert(m); 2040 if (__predict_false(m == NULL)) 2041 goto drop; 2042 ip_off += ETHER_VLAN_ENCAP_LEN; 2043 } 2044 #endif 2045 /* (try to) map the frame for DMA */ 2046 idx = tx->req & tx->mask; 2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2048 m, tx->seg_list, &cnt, 2049 BUS_DMA_NOWAIT); 2050 if (__predict_false(err == EFBIG)) { 2051 /* Too many segments in the chain. Try 2052 to defrag */ 2053 m_tmp = m_defrag(m, M_NOWAIT); 2054 if (m_tmp == NULL) { 2055 goto drop; 2056 } 2057 ss->tx.defrag++; 2058 m = m_tmp; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2060 tx->info[idx].map, 2061 m, tx->seg_list, &cnt, 2062 BUS_DMA_NOWAIT); 2063 } 2064 if (__predict_false(err != 0)) { 2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2066 " packet len = %d\n", err, m->m_pkthdr.len); 2067 goto drop; 2068 } 2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2070 BUS_DMASYNC_PREWRITE); 2071 tx->info[idx].m = m; 2072 2073 #if IFCAP_TSO4 2074 /* TSO is different enough, we handle it in another routine */ 2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2076 mxge_encap_tso(ss, m, cnt, ip_off); 2077 return; 2078 } 2079 #endif 2080 2081 req = tx->req_list; 2082 cksum_offset = 0; 2083 pseudo_hdr_offset = 0; 2084 flags = MXGEFW_FLAGS_NO_TSO; 2085 2086 /* checksum offloading? */ 2087 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2088 /* ensure ip header is in first mbuf, copy 2089 it to a scratch buffer if not */ 2090 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2091 m_copydata(m, 0, ip_off + sizeof (*ip), 2092 ss->scratch); 2093 ip = (struct ip *)(ss->scratch + ip_off); 2094 } else { 2095 ip = (struct ip *)(mtod(m, char *) + ip_off); 2096 } 2097 cksum_offset = ip_off + (ip->ip_hl << 2); 2098 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2099 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2100 req->cksum_offset = cksum_offset; 2101 flags |= MXGEFW_FLAGS_CKSUM; 2102 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2103 } else { 2104 odd_flag = 0; 2105 } 2106 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2107 flags |= MXGEFW_FLAGS_SMALL; 2108 2109 /* convert segments into a request list */ 2110 cum_len = 0; 2111 seg = tx->seg_list; 2112 req->flags = MXGEFW_FLAGS_FIRST; 2113 for (i = 0; i < cnt; i++) { 2114 req->addr_low = 2115 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2116 req->addr_high = 2117 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2118 req->length = htobe16(seg->ds_len); 2119 req->cksum_offset = cksum_offset; 2120 if (cksum_offset > seg->ds_len) 2121 cksum_offset -= seg->ds_len; 2122 else 2123 cksum_offset = 0; 2124 req->pseudo_hdr_offset = pseudo_hdr_offset; 2125 req->pad = 0; /* complete solid 16-byte block */ 2126 req->rdma_count = 1; 2127 req->flags |= flags | ((cum_len & 1) * odd_flag); 2128 cum_len += seg->ds_len; 2129 seg++; 2130 req++; 2131 req->flags = 0; 2132 } 2133 req--; 2134 /* pad runts to 60 bytes */ 2135 if (cum_len < 60) { 2136 req++; 2137 req->addr_low = 2138 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2139 req->addr_high = 2140 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2141 req->length = htobe16(60 - cum_len); 2142 req->cksum_offset = 0; 2143 req->pseudo_hdr_offset = pseudo_hdr_offset; 2144 req->pad = 0; /* complete solid 16-byte block */ 2145 req->rdma_count = 1; 2146 req->flags |= flags | ((cum_len & 1) * odd_flag); 2147 cnt++; 2148 } 2149 2150 tx->req_list[0].rdma_count = cnt; 2151 #if 0 2152 /* print what the firmware will see */ 2153 for (i = 0; i < cnt; i++) { 2154 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2155 "cso:%d, flags:0x%x, rdma:%d\n", 2156 i, (int)ntohl(tx->req_list[i].addr_high), 2157 (int)ntohl(tx->req_list[i].addr_low), 2158 (int)ntohs(tx->req_list[i].length), 2159 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2160 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2161 tx->req_list[i].rdma_count); 2162 } 2163 printf("--------------\n"); 2164 #endif 2165 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2166 mxge_submit_req(tx, tx->req_list, cnt); 2167 #ifdef IFNET_BUF_RING 2168 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2169 /* tell the NIC to start polling this slice */ 2170 *tx->send_go = 1; 2171 tx->queue_active = 1; 2172 tx->activate++; 2173 wmb(); 2174 } 2175 #endif 2176 return; 2177 2178 drop: 2179 m_freem(m); 2180 ss->oerrors++; 2181 return; 2182 } 2183 2184 #ifdef IFNET_BUF_RING 2185 static void 2186 mxge_qflush(struct ifnet *ifp) 2187 { 2188 mxge_softc_t *sc = ifp->if_softc; 2189 mxge_tx_ring_t *tx; 2190 struct mbuf *m; 2191 int slice; 2192 2193 for (slice = 0; slice < sc->num_slices; slice++) { 2194 tx = &sc->ss[slice].tx; 2195 mtx_lock(&tx->mtx); 2196 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2197 m_freem(m); 2198 mtx_unlock(&tx->mtx); 2199 } 2200 if_qflush(ifp); 2201 } 2202 2203 static inline void 2204 mxge_start_locked(struct mxge_slice_state *ss) 2205 { 2206 mxge_softc_t *sc; 2207 struct mbuf *m; 2208 struct ifnet *ifp; 2209 mxge_tx_ring_t *tx; 2210 2211 sc = ss->sc; 2212 ifp = sc->ifp; 2213 tx = &ss->tx; 2214 2215 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2216 m = drbr_dequeue(ifp, tx->br); 2217 if (m == NULL) { 2218 return; 2219 } 2220 /* let BPF see it */ 2221 BPF_MTAP(ifp, m); 2222 2223 /* give it to the nic */ 2224 mxge_encap(ss, m); 2225 } 2226 /* ran out of transmit slots */ 2227 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2228 && (!drbr_empty(ifp, tx->br))) { 2229 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2230 tx->stall++; 2231 } 2232 } 2233 2234 static int 2235 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2236 { 2237 mxge_softc_t *sc; 2238 struct ifnet *ifp; 2239 mxge_tx_ring_t *tx; 2240 int err; 2241 2242 sc = ss->sc; 2243 ifp = sc->ifp; 2244 tx = &ss->tx; 2245 2246 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2247 IFF_DRV_RUNNING) { 2248 err = drbr_enqueue(ifp, tx->br, m); 2249 return (err); 2250 } 2251 2252 if (!drbr_needs_enqueue(ifp, tx->br) && 2253 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2254 /* let BPF see it */ 2255 BPF_MTAP(ifp, m); 2256 /* give it to the nic */ 2257 mxge_encap(ss, m); 2258 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2259 return (err); 2260 } 2261 if (!drbr_empty(ifp, tx->br)) 2262 mxge_start_locked(ss); 2263 return (0); 2264 } 2265 2266 static int 2267 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2268 { 2269 mxge_softc_t *sc = ifp->if_softc; 2270 struct mxge_slice_state *ss; 2271 mxge_tx_ring_t *tx; 2272 int err = 0; 2273 int slice; 2274 2275 slice = m->m_pkthdr.flowid; 2276 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2277 2278 ss = &sc->ss[slice]; 2279 tx = &ss->tx; 2280 2281 if (mtx_trylock(&tx->mtx)) { 2282 err = mxge_transmit_locked(ss, m); 2283 mtx_unlock(&tx->mtx); 2284 } else { 2285 err = drbr_enqueue(ifp, tx->br, m); 2286 } 2287 2288 return (err); 2289 } 2290 2291 #else 2292 2293 static inline void 2294 mxge_start_locked(struct mxge_slice_state *ss) 2295 { 2296 mxge_softc_t *sc; 2297 struct mbuf *m; 2298 struct ifnet *ifp; 2299 mxge_tx_ring_t *tx; 2300 2301 sc = ss->sc; 2302 ifp = sc->ifp; 2303 tx = &ss->tx; 2304 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2305 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2306 if (m == NULL) { 2307 return; 2308 } 2309 /* let BPF see it */ 2310 BPF_MTAP(ifp, m); 2311 2312 /* give it to the nic */ 2313 mxge_encap(ss, m); 2314 } 2315 /* ran out of transmit slots */ 2316 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2317 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2318 tx->stall++; 2319 } 2320 } 2321 #endif 2322 static void 2323 mxge_start(struct ifnet *ifp) 2324 { 2325 mxge_softc_t *sc = ifp->if_softc; 2326 struct mxge_slice_state *ss; 2327 2328 /* only use the first slice for now */ 2329 ss = &sc->ss[0]; 2330 mtx_lock(&ss->tx.mtx); 2331 mxge_start_locked(ss); 2332 mtx_unlock(&ss->tx.mtx); 2333 } 2334 2335 /* 2336 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2337 * at most 32 bytes at a time, so as to avoid involving the software 2338 * pio handler in the nic. We re-write the first segment's low 2339 * DMA address to mark it valid only after we write the entire chunk 2340 * in a burst 2341 */ 2342 static inline void 2343 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2344 mcp_kreq_ether_recv_t *src) 2345 { 2346 uint32_t low; 2347 2348 low = src->addr_low; 2349 src->addr_low = 0xffffffff; 2350 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2351 wmb(); 2352 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2353 wmb(); 2354 src->addr_low = low; 2355 dst->addr_low = low; 2356 wmb(); 2357 } 2358 2359 static int 2360 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2361 { 2362 bus_dma_segment_t seg; 2363 struct mbuf *m; 2364 mxge_rx_ring_t *rx = &ss->rx_small; 2365 int cnt, err; 2366 2367 m = m_gethdr(M_DONTWAIT, MT_DATA); 2368 if (m == NULL) { 2369 rx->alloc_fail++; 2370 err = ENOBUFS; 2371 goto done; 2372 } 2373 m->m_len = MHLEN; 2374 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2375 &seg, &cnt, BUS_DMA_NOWAIT); 2376 if (err != 0) { 2377 m_free(m); 2378 goto done; 2379 } 2380 rx->info[idx].m = m; 2381 rx->shadow[idx].addr_low = 2382 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2383 rx->shadow[idx].addr_high = 2384 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2385 2386 done: 2387 if ((idx & 7) == 7) 2388 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2389 return err; 2390 } 2391 2392 static int 2393 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2394 { 2395 bus_dma_segment_t seg[3]; 2396 struct mbuf *m; 2397 mxge_rx_ring_t *rx = &ss->rx_big; 2398 int cnt, err, i; 2399 2400 if (rx->cl_size == MCLBYTES) 2401 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2402 else 2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_len = rx->mlen; 2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2411 seg, &cnt, BUS_DMA_NOWAIT); 2412 if (err != 0) { 2413 m_free(m); 2414 goto done; 2415 } 2416 rx->info[idx].m = m; 2417 rx->shadow[idx].addr_low = 2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2419 rx->shadow[idx].addr_high = 2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2421 2422 #if MXGE_VIRT_JUMBOS 2423 for (i = 1; i < cnt; i++) { 2424 rx->shadow[idx + i].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2426 rx->shadow[idx + i].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2428 } 2429 #endif 2430 2431 done: 2432 for (i = 0; i < rx->nbufs; i++) { 2433 if ((idx & 7) == 7) { 2434 mxge_submit_8rx(&rx->lanai[idx - 7], 2435 &rx->shadow[idx - 7]); 2436 } 2437 idx++; 2438 } 2439 return err; 2440 } 2441 2442 /* 2443 * Myri10GE hardware checksums are not valid if the sender 2444 * padded the frame with non-zero padding. This is because 2445 * the firmware just does a simple 16-bit 1s complement 2446 * checksum across the entire frame, excluding the first 14 2447 * bytes. It is best to simply to check the checksum and 2448 * tell the stack about it only if the checksum is good 2449 */ 2450 2451 static inline uint16_t 2452 mxge_rx_csum(struct mbuf *m, int csum) 2453 { 2454 struct ether_header *eh; 2455 struct ip *ip; 2456 uint16_t c; 2457 2458 eh = mtod(m, struct ether_header *); 2459 2460 /* only deal with IPv4 TCP & UDP for now */ 2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2462 return 1; 2463 ip = (struct ip *)(eh + 1); 2464 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2465 ip->ip_p != IPPROTO_UDP)) 2466 return 1; 2467 #ifdef INET 2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2469 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2470 - (ip->ip_hl << 2) + ip->ip_p)); 2471 #else 2472 c = 1; 2473 #endif 2474 c ^= 0xffff; 2475 return (c); 2476 } 2477 2478 static void 2479 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2480 { 2481 struct ether_vlan_header *evl; 2482 struct ether_header *eh; 2483 uint32_t partial; 2484 2485 evl = mtod(m, struct ether_vlan_header *); 2486 eh = mtod(m, struct ether_header *); 2487 2488 /* 2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2490 * after what the firmware thought was the end of the ethernet 2491 * header. 2492 */ 2493 2494 /* put checksum into host byte order */ 2495 *csum = ntohs(*csum); 2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2497 (*csum) += ~partial; 2498 (*csum) += ((*csum) < ~partial); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2501 2502 /* restore checksum to network byte order; 2503 later consumers expect this */ 2504 *csum = htons(*csum); 2505 2506 /* save the tag */ 2507 #ifdef MXGE_NEW_VLAN_API 2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2509 #else 2510 { 2511 struct m_tag *mtag; 2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2513 M_NOWAIT); 2514 if (mtag == NULL) 2515 return; 2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2517 m_tag_prepend(m, mtag); 2518 } 2519 2520 #endif 2521 m->m_flags |= M_VLANTAG; 2522 2523 /* 2524 * Remove the 802.1q header by copying the Ethernet 2525 * addresses over it and adjusting the beginning of 2526 * the data in the mbuf. The encapsulated Ethernet 2527 * type field is already in place. 2528 */ 2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2530 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2531 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2532 } 2533 2534 2535 static inline void 2536 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2537 { 2538 mxge_softc_t *sc; 2539 struct ifnet *ifp; 2540 struct mbuf *m; 2541 struct ether_header *eh; 2542 mxge_rx_ring_t *rx; 2543 bus_dmamap_t old_map; 2544 int idx; 2545 uint16_t tcpudp_csum; 2546 2547 sc = ss->sc; 2548 ifp = sc->ifp; 2549 rx = &ss->rx_big; 2550 idx = rx->cnt & rx->mask; 2551 rx->cnt += rx->nbufs; 2552 /* save a pointer to the received mbuf */ 2553 m = rx->info[idx].m; 2554 /* try to replace the received mbuf */ 2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2556 /* drop the frame -- the old mbuf is re-cycled */ 2557 ifp->if_ierrors++; 2558 return; 2559 } 2560 2561 /* unmap the received buffer */ 2562 old_map = rx->info[idx].map; 2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2564 bus_dmamap_unload(rx->dmat, old_map); 2565 2566 /* swap the bus_dmamap_t's */ 2567 rx->info[idx].map = rx->extra_map; 2568 rx->extra_map = old_map; 2569 2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2571 * aligned */ 2572 m->m_data += MXGEFW_PAD; 2573 2574 m->m_pkthdr.rcvif = ifp; 2575 m->m_len = m->m_pkthdr.len = len; 2576 ss->ipackets++; 2577 eh = mtod(m, struct ether_header *); 2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2579 mxge_vlan_tag_remove(m, &csum); 2580 } 2581 /* if the checksum is valid, mark it in the mbuf header */ 2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2584 return; 2585 /* otherwise, it was a UDP frame, or a TCP frame which 2586 we could not do LRO on. Tell the stack that the 2587 checksum is good */ 2588 m->m_pkthdr.csum_data = 0xffff; 2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2590 } 2591 /* flowid only valid if RSS hashing is enabled */ 2592 if (sc->num_slices > 1) { 2593 m->m_pkthdr.flowid = (ss - sc->ss); 2594 m->m_flags |= M_FLOWID; 2595 } 2596 /* pass the frame up the stack */ 2597 (*ifp->if_input)(ifp, m); 2598 } 2599 2600 static inline void 2601 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2602 { 2603 mxge_softc_t *sc; 2604 struct ifnet *ifp; 2605 struct ether_header *eh; 2606 struct mbuf *m; 2607 mxge_rx_ring_t *rx; 2608 bus_dmamap_t old_map; 2609 int idx; 2610 uint16_t tcpudp_csum; 2611 2612 sc = ss->sc; 2613 ifp = sc->ifp; 2614 rx = &ss->rx_small; 2615 idx = rx->cnt & rx->mask; 2616 rx->cnt++; 2617 /* save a pointer to the received mbuf */ 2618 m = rx->info[idx].m; 2619 /* try to replace the received mbuf */ 2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2621 /* drop the frame -- the old mbuf is re-cycled */ 2622 ifp->if_ierrors++; 2623 return; 2624 } 2625 2626 /* unmap the received buffer */ 2627 old_map = rx->info[idx].map; 2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2629 bus_dmamap_unload(rx->dmat, old_map); 2630 2631 /* swap the bus_dmamap_t's */ 2632 rx->info[idx].map = rx->extra_map; 2633 rx->extra_map = old_map; 2634 2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2636 * aligned */ 2637 m->m_data += MXGEFW_PAD; 2638 2639 m->m_pkthdr.rcvif = ifp; 2640 m->m_len = m->m_pkthdr.len = len; 2641 ss->ipackets++; 2642 eh = mtod(m, struct ether_header *); 2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2644 mxge_vlan_tag_remove(m, &csum); 2645 } 2646 /* if the checksum is valid, mark it in the mbuf header */ 2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2649 return; 2650 /* otherwise, it was a UDP frame, or a TCP frame which 2651 we could not do LRO on. Tell the stack that the 2652 checksum is good */ 2653 m->m_pkthdr.csum_data = 0xffff; 2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2655 } 2656 /* flowid only valid if RSS hashing is enabled */ 2657 if (sc->num_slices > 1) { 2658 m->m_pkthdr.flowid = (ss - sc->ss); 2659 m->m_flags |= M_FLOWID; 2660 } 2661 /* pass the frame up the stack */ 2662 (*ifp->if_input)(ifp, m); 2663 } 2664 2665 static inline void 2666 mxge_clean_rx_done(struct mxge_slice_state *ss) 2667 { 2668 mxge_rx_done_t *rx_done = &ss->rx_done; 2669 int limit = 0; 2670 uint16_t length; 2671 uint16_t checksum; 2672 2673 2674 while (rx_done->entry[rx_done->idx].length != 0) { 2675 length = ntohs(rx_done->entry[rx_done->idx].length); 2676 rx_done->entry[rx_done->idx].length = 0; 2677 checksum = rx_done->entry[rx_done->idx].checksum; 2678 if (length <= (MHLEN - MXGEFW_PAD)) 2679 mxge_rx_done_small(ss, length, checksum); 2680 else 2681 mxge_rx_done_big(ss, length, checksum); 2682 rx_done->cnt++; 2683 rx_done->idx = rx_done->cnt & rx_done->mask; 2684 2685 /* limit potential for livelock */ 2686 if (__predict_false(++limit > rx_done->mask / 2)) 2687 break; 2688 } 2689 #ifdef INET 2690 while (!SLIST_EMPTY(&ss->lro_active)) { 2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2692 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2693 mxge_lro_flush(ss, lro); 2694 } 2695 #endif 2696 } 2697 2698 2699 static inline void 2700 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2701 { 2702 struct ifnet *ifp; 2703 mxge_tx_ring_t *tx; 2704 struct mbuf *m; 2705 bus_dmamap_t map; 2706 int idx; 2707 int *flags; 2708 2709 tx = &ss->tx; 2710 ifp = ss->sc->ifp; 2711 while (tx->pkt_done != mcp_idx) { 2712 idx = tx->done & tx->mask; 2713 tx->done++; 2714 m = tx->info[idx].m; 2715 /* mbuf and DMA map only attached to the first 2716 segment per-mbuf */ 2717 if (m != NULL) { 2718 ss->obytes += m->m_pkthdr.len; 2719 if (m->m_flags & M_MCAST) 2720 ss->omcasts++; 2721 ss->opackets++; 2722 tx->info[idx].m = NULL; 2723 map = tx->info[idx].map; 2724 bus_dmamap_unload(tx->dmat, map); 2725 m_freem(m); 2726 } 2727 if (tx->info[idx].flag) { 2728 tx->info[idx].flag = 0; 2729 tx->pkt_done++; 2730 } 2731 } 2732 2733 /* If we have space, clear IFF_OACTIVE to tell the stack that 2734 its OK to send packets */ 2735 #ifdef IFNET_BUF_RING 2736 flags = &ss->if_drv_flags; 2737 #else 2738 flags = &ifp->if_drv_flags; 2739 #endif 2740 mtx_lock(&ss->tx.mtx); 2741 if ((*flags) & IFF_DRV_OACTIVE && 2742 tx->req - tx->done < (tx->mask + 1)/4) { 2743 *(flags) &= ~IFF_DRV_OACTIVE; 2744 ss->tx.wake++; 2745 mxge_start_locked(ss); 2746 } 2747 #ifdef IFNET_BUF_RING 2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2749 /* let the NIC stop polling this queue, since there 2750 * are no more transmits pending */ 2751 if (tx->req == tx->done) { 2752 *tx->send_stop = 1; 2753 tx->queue_active = 0; 2754 tx->deactivate++; 2755 wmb(); 2756 } 2757 } 2758 #endif 2759 mtx_unlock(&ss->tx.mtx); 2760 2761 } 2762 2763 static struct mxge_media_type mxge_xfp_media_types[] = 2764 { 2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2768 {0, (1 << 5), "10GBASE-ER"}, 2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2770 {0, (1 << 3), "10GBASE-SW"}, 2771 {0, (1 << 2), "10GBASE-LW"}, 2772 {0, (1 << 1), "10GBASE-EW"}, 2773 {0, (1 << 0), "Reserved"} 2774 }; 2775 static struct mxge_media_type mxge_sfp_media_types[] = 2776 { 2777 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2778 {0, (1 << 7), "Reserved"}, 2779 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2780 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2781 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2782 }; 2783 2784 static void 2785 mxge_set_media(mxge_softc_t *sc, int type) 2786 { 2787 sc->media_flags |= type; 2788 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2789 ifmedia_set(&sc->media, sc->media_flags); 2790 } 2791 2792 2793 /* 2794 * Determine the media type for a NIC. Some XFPs will identify 2795 * themselves only when their link is up, so this is initiated via a 2796 * link up interrupt. However, this can potentially take up to 2797 * several milliseconds, so it is run via the watchdog routine, rather 2798 * than in the interrupt handler itself. This need only be done 2799 * once, not each time the link is up. 2800 */ 2801 static void 2802 mxge_media_probe(mxge_softc_t *sc) 2803 { 2804 mxge_cmd_t cmd; 2805 char *cage_type; 2806 char *ptr; 2807 struct mxge_media_type *mxge_media_types = NULL; 2808 int i, err, ms, mxge_media_type_entries; 2809 uint32_t byte; 2810 2811 sc->need_media_probe = 0; 2812 2813 /* if we've already set a media type, we're done */ 2814 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2815 return; 2816 2817 /* 2818 * parse the product code to deterimine the interface type 2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2820 * after the 3rd dash in the driver's cached copy of the 2821 * EEPROM's product code string. 2822 */ 2823 ptr = sc->product_code_string; 2824 if (ptr == NULL) { 2825 device_printf(sc->dev, "Missing product code\n"); 2826 } 2827 2828 for (i = 0; i < 3; i++, ptr++) { 2829 ptr = index(ptr, '-'); 2830 if (ptr == NULL) { 2831 device_printf(sc->dev, 2832 "only %d dashes in PC?!?\n", i); 2833 return; 2834 } 2835 } 2836 if (*ptr == 'C') { 2837 /* -C is CX4 */ 2838 mxge_set_media(sc, IFM_10G_CX4); 2839 return; 2840 } 2841 else if (*ptr == 'Q') { 2842 /* -Q is Quad Ribbon Fiber */ 2843 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2844 /* FreeBSD has no media type for Quad ribbon fiber */ 2845 return; 2846 } 2847 2848 if (*ptr == 'R') { 2849 /* -R is XFP */ 2850 mxge_media_types = mxge_xfp_media_types; 2851 mxge_media_type_entries = 2852 sizeof (mxge_xfp_media_types) / 2853 sizeof (mxge_xfp_media_types[0]); 2854 byte = MXGE_XFP_COMPLIANCE_BYTE; 2855 cage_type = "XFP"; 2856 } 2857 2858 if (*ptr == 'S' || *(ptr +1) == 'S') { 2859 /* -S or -2S is SFP+ */ 2860 mxge_media_types = mxge_sfp_media_types; 2861 mxge_media_type_entries = 2862 sizeof (mxge_sfp_media_types) / 2863 sizeof (mxge_sfp_media_types[0]); 2864 cage_type = "SFP+"; 2865 byte = 3; 2866 } 2867 2868 if (mxge_media_types == NULL) { 2869 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2870 return; 2871 } 2872 2873 /* 2874 * At this point we know the NIC has an XFP cage, so now we 2875 * try to determine what is in the cage by using the 2876 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2877 * register. We read just one byte, which may take over 2878 * a millisecond 2879 */ 2880 2881 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2882 cmd.data1 = byte; 2883 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2884 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2885 device_printf(sc->dev, "failed to read XFP\n"); 2886 } 2887 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2888 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2889 } 2890 if (err != MXGEFW_CMD_OK) { 2891 return; 2892 } 2893 2894 /* now we wait for the data to be cached */ 2895 cmd.data0 = byte; 2896 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2897 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2898 DELAY(1000); 2899 cmd.data0 = byte; 2900 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2901 } 2902 if (err != MXGEFW_CMD_OK) { 2903 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2904 cage_type, err, ms); 2905 return; 2906 } 2907 2908 if (cmd.data0 == mxge_media_types[0].bitmask) { 2909 if (mxge_verbose) 2910 device_printf(sc->dev, "%s:%s\n", cage_type, 2911 mxge_media_types[0].name); 2912 mxge_set_media(sc, mxge_media_types[0].flag); 2913 return; 2914 } 2915 for (i = 1; i < mxge_media_type_entries; i++) { 2916 if (cmd.data0 & mxge_media_types[i].bitmask) { 2917 if (mxge_verbose) 2918 device_printf(sc->dev, "%s:%s\n", 2919 cage_type, 2920 mxge_media_types[i].name); 2921 2922 mxge_set_media(sc, mxge_media_types[i].flag); 2923 return; 2924 } 2925 } 2926 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2927 cmd.data0); 2928 2929 return; 2930 } 2931 2932 static void 2933 mxge_intr(void *arg) 2934 { 2935 struct mxge_slice_state *ss = arg; 2936 mxge_softc_t *sc = ss->sc; 2937 mcp_irq_data_t *stats = ss->fw_stats; 2938 mxge_tx_ring_t *tx = &ss->tx; 2939 mxge_rx_done_t *rx_done = &ss->rx_done; 2940 uint32_t send_done_count; 2941 uint8_t valid; 2942 2943 2944 #ifndef IFNET_BUF_RING 2945 /* an interrupt on a non-zero slice is implicitly valid 2946 since MSI-X irqs are not shared */ 2947 if (ss != sc->ss) { 2948 mxge_clean_rx_done(ss); 2949 *ss->irq_claim = be32toh(3); 2950 return; 2951 } 2952 #endif 2953 2954 /* make sure the DMA has finished */ 2955 if (!stats->valid) { 2956 return; 2957 } 2958 valid = stats->valid; 2959 2960 if (sc->legacy_irq) { 2961 /* lower legacy IRQ */ 2962 *sc->irq_deassert = 0; 2963 if (!mxge_deassert_wait) 2964 /* don't wait for conf. that irq is low */ 2965 stats->valid = 0; 2966 } else { 2967 stats->valid = 0; 2968 } 2969 2970 /* loop while waiting for legacy irq deassertion */ 2971 do { 2972 /* check for transmit completes and receives */ 2973 send_done_count = be32toh(stats->send_done_count); 2974 while ((send_done_count != tx->pkt_done) || 2975 (rx_done->entry[rx_done->idx].length != 0)) { 2976 if (send_done_count != tx->pkt_done) 2977 mxge_tx_done(ss, (int)send_done_count); 2978 mxge_clean_rx_done(ss); 2979 send_done_count = be32toh(stats->send_done_count); 2980 } 2981 if (sc->legacy_irq && mxge_deassert_wait) 2982 wmb(); 2983 } while (*((volatile uint8_t *) &stats->valid)); 2984 2985 /* fw link & error stats meaningful only on the first slice */ 2986 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2987 if (sc->link_state != stats->link_up) { 2988 sc->link_state = stats->link_up; 2989 if (sc->link_state) { 2990 if_link_state_change(sc->ifp, LINK_STATE_UP); 2991 if (mxge_verbose) 2992 device_printf(sc->dev, "link up\n"); 2993 } else { 2994 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2995 if (mxge_verbose) 2996 device_printf(sc->dev, "link down\n"); 2997 } 2998 sc->need_media_probe = 1; 2999 } 3000 if (sc->rdma_tags_available != 3001 be32toh(stats->rdma_tags_available)) { 3002 sc->rdma_tags_available = 3003 be32toh(stats->rdma_tags_available); 3004 device_printf(sc->dev, "RDMA timed out! %d tags " 3005 "left\n", sc->rdma_tags_available); 3006 } 3007 3008 if (stats->link_down) { 3009 sc->down_cnt += stats->link_down; 3010 sc->link_state = 0; 3011 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3012 } 3013 } 3014 3015 /* check to see if we have rx token to pass back */ 3016 if (valid & 0x1) 3017 *ss->irq_claim = be32toh(3); 3018 *(ss->irq_claim + 1) = be32toh(3); 3019 } 3020 3021 static void 3022 mxge_init(void *arg) 3023 { 3024 } 3025 3026 3027 3028 static void 3029 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3030 { 3031 struct lro_entry *lro_entry; 3032 int i; 3033 3034 while (!SLIST_EMPTY(&ss->lro_free)) { 3035 lro_entry = SLIST_FIRST(&ss->lro_free); 3036 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3037 free(lro_entry, M_DEVBUF); 3038 } 3039 3040 for (i = 0; i <= ss->rx_big.mask; i++) { 3041 if (ss->rx_big.info[i].m == NULL) 3042 continue; 3043 bus_dmamap_unload(ss->rx_big.dmat, 3044 ss->rx_big.info[i].map); 3045 m_freem(ss->rx_big.info[i].m); 3046 ss->rx_big.info[i].m = NULL; 3047 } 3048 3049 for (i = 0; i <= ss->rx_small.mask; i++) { 3050 if (ss->rx_small.info[i].m == NULL) 3051 continue; 3052 bus_dmamap_unload(ss->rx_small.dmat, 3053 ss->rx_small.info[i].map); 3054 m_freem(ss->rx_small.info[i].m); 3055 ss->rx_small.info[i].m = NULL; 3056 } 3057 3058 /* transmit ring used only on the first slice */ 3059 if (ss->tx.info == NULL) 3060 return; 3061 3062 for (i = 0; i <= ss->tx.mask; i++) { 3063 ss->tx.info[i].flag = 0; 3064 if (ss->tx.info[i].m == NULL) 3065 continue; 3066 bus_dmamap_unload(ss->tx.dmat, 3067 ss->tx.info[i].map); 3068 m_freem(ss->tx.info[i].m); 3069 ss->tx.info[i].m = NULL; 3070 } 3071 } 3072 3073 static void 3074 mxge_free_mbufs(mxge_softc_t *sc) 3075 { 3076 int slice; 3077 3078 for (slice = 0; slice < sc->num_slices; slice++) 3079 mxge_free_slice_mbufs(&sc->ss[slice]); 3080 } 3081 3082 static void 3083 mxge_free_slice_rings(struct mxge_slice_state *ss) 3084 { 3085 int i; 3086 3087 3088 if (ss->rx_done.entry != NULL) 3089 mxge_dma_free(&ss->rx_done.dma); 3090 ss->rx_done.entry = NULL; 3091 3092 if (ss->tx.req_bytes != NULL) 3093 free(ss->tx.req_bytes, M_DEVBUF); 3094 ss->tx.req_bytes = NULL; 3095 3096 if (ss->tx.seg_list != NULL) 3097 free(ss->tx.seg_list, M_DEVBUF); 3098 ss->tx.seg_list = NULL; 3099 3100 if (ss->rx_small.shadow != NULL) 3101 free(ss->rx_small.shadow, M_DEVBUF); 3102 ss->rx_small.shadow = NULL; 3103 3104 if (ss->rx_big.shadow != NULL) 3105 free(ss->rx_big.shadow, M_DEVBUF); 3106 ss->rx_big.shadow = NULL; 3107 3108 if (ss->tx.info != NULL) { 3109 if (ss->tx.dmat != NULL) { 3110 for (i = 0; i <= ss->tx.mask; i++) { 3111 bus_dmamap_destroy(ss->tx.dmat, 3112 ss->tx.info[i].map); 3113 } 3114 bus_dma_tag_destroy(ss->tx.dmat); 3115 } 3116 free(ss->tx.info, M_DEVBUF); 3117 } 3118 ss->tx.info = NULL; 3119 3120 if (ss->rx_small.info != NULL) { 3121 if (ss->rx_small.dmat != NULL) { 3122 for (i = 0; i <= ss->rx_small.mask; i++) { 3123 bus_dmamap_destroy(ss->rx_small.dmat, 3124 ss->rx_small.info[i].map); 3125 } 3126 bus_dmamap_destroy(ss->rx_small.dmat, 3127 ss->rx_small.extra_map); 3128 bus_dma_tag_destroy(ss->rx_small.dmat); 3129 } 3130 free(ss->rx_small.info, M_DEVBUF); 3131 } 3132 ss->rx_small.info = NULL; 3133 3134 if (ss->rx_big.info != NULL) { 3135 if (ss->rx_big.dmat != NULL) { 3136 for (i = 0; i <= ss->rx_big.mask; i++) { 3137 bus_dmamap_destroy(ss->rx_big.dmat, 3138 ss->rx_big.info[i].map); 3139 } 3140 bus_dmamap_destroy(ss->rx_big.dmat, 3141 ss->rx_big.extra_map); 3142 bus_dma_tag_destroy(ss->rx_big.dmat); 3143 } 3144 free(ss->rx_big.info, M_DEVBUF); 3145 } 3146 ss->rx_big.info = NULL; 3147 } 3148 3149 static void 3150 mxge_free_rings(mxge_softc_t *sc) 3151 { 3152 int slice; 3153 3154 for (slice = 0; slice < sc->num_slices; slice++) 3155 mxge_free_slice_rings(&sc->ss[slice]); 3156 } 3157 3158 static int 3159 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3160 int tx_ring_entries) 3161 { 3162 mxge_softc_t *sc = ss->sc; 3163 size_t bytes; 3164 int err, i; 3165 3166 err = ENOMEM; 3167 3168 /* allocate per-slice receive resources */ 3169 3170 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3171 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3172 3173 /* allocate the rx shadow rings */ 3174 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3175 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3176 if (ss->rx_small.shadow == NULL) 3177 return err; 3178 3179 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3180 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3181 if (ss->rx_big.shadow == NULL) 3182 return err; 3183 3184 /* allocate the rx host info rings */ 3185 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3186 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3187 if (ss->rx_small.info == NULL) 3188 return err; 3189 3190 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3191 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3192 if (ss->rx_big.info == NULL) 3193 return err; 3194 3195 /* allocate the rx busdma resources */ 3196 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3197 1, /* alignment */ 3198 4096, /* boundary */ 3199 BUS_SPACE_MAXADDR, /* low */ 3200 BUS_SPACE_MAXADDR, /* high */ 3201 NULL, NULL, /* filter */ 3202 MHLEN, /* maxsize */ 3203 1, /* num segs */ 3204 MHLEN, /* maxsegsize */ 3205 BUS_DMA_ALLOCNOW, /* flags */ 3206 NULL, NULL, /* lock */ 3207 &ss->rx_small.dmat); /* tag */ 3208 if (err != 0) { 3209 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3210 err); 3211 return err; 3212 } 3213 3214 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3215 1, /* alignment */ 3216 #if MXGE_VIRT_JUMBOS 3217 4096, /* boundary */ 3218 #else 3219 0, /* boundary */ 3220 #endif 3221 BUS_SPACE_MAXADDR, /* low */ 3222 BUS_SPACE_MAXADDR, /* high */ 3223 NULL, NULL, /* filter */ 3224 3*4096, /* maxsize */ 3225 #if MXGE_VIRT_JUMBOS 3226 3, /* num segs */ 3227 4096, /* maxsegsize*/ 3228 #else 3229 1, /* num segs */ 3230 MJUM9BYTES, /* maxsegsize*/ 3231 #endif 3232 BUS_DMA_ALLOCNOW, /* flags */ 3233 NULL, NULL, /* lock */ 3234 &ss->rx_big.dmat); /* tag */ 3235 if (err != 0) { 3236 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3237 err); 3238 return err; 3239 } 3240 for (i = 0; i <= ss->rx_small.mask; i++) { 3241 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3242 &ss->rx_small.info[i].map); 3243 if (err != 0) { 3244 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3245 err); 3246 return err; 3247 } 3248 } 3249 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3250 &ss->rx_small.extra_map); 3251 if (err != 0) { 3252 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3253 err); 3254 return err; 3255 } 3256 3257 for (i = 0; i <= ss->rx_big.mask; i++) { 3258 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3259 &ss->rx_big.info[i].map); 3260 if (err != 0) { 3261 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3262 err); 3263 return err; 3264 } 3265 } 3266 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3267 &ss->rx_big.extra_map); 3268 if (err != 0) { 3269 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3270 err); 3271 return err; 3272 } 3273 3274 /* now allocate TX resouces */ 3275 3276 #ifndef IFNET_BUF_RING 3277 /* only use a single TX ring for now */ 3278 if (ss != ss->sc->ss) 3279 return 0; 3280 #endif 3281 3282 ss->tx.mask = tx_ring_entries - 1; 3283 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3284 3285 3286 /* allocate the tx request copy block */ 3287 bytes = 8 + 3288 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3289 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3290 if (ss->tx.req_bytes == NULL) 3291 return err; 3292 /* ensure req_list entries are aligned to 8 bytes */ 3293 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3294 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3295 3296 /* allocate the tx busdma segment list */ 3297 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3298 ss->tx.seg_list = (bus_dma_segment_t *) 3299 malloc(bytes, M_DEVBUF, M_WAITOK); 3300 if (ss->tx.seg_list == NULL) 3301 return err; 3302 3303 /* allocate the tx host info ring */ 3304 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3305 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3306 if (ss->tx.info == NULL) 3307 return err; 3308 3309 /* allocate the tx busdma resources */ 3310 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3311 1, /* alignment */ 3312 sc->tx_boundary, /* boundary */ 3313 BUS_SPACE_MAXADDR, /* low */ 3314 BUS_SPACE_MAXADDR, /* high */ 3315 NULL, NULL, /* filter */ 3316 65536 + 256, /* maxsize */ 3317 ss->tx.max_desc - 2, /* num segs */ 3318 sc->tx_boundary, /* maxsegsz */ 3319 BUS_DMA_ALLOCNOW, /* flags */ 3320 NULL, NULL, /* lock */ 3321 &ss->tx.dmat); /* tag */ 3322 3323 if (err != 0) { 3324 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3325 err); 3326 return err; 3327 } 3328 3329 /* now use these tags to setup dmamaps for each slot 3330 in the ring */ 3331 for (i = 0; i <= ss->tx.mask; i++) { 3332 err = bus_dmamap_create(ss->tx.dmat, 0, 3333 &ss->tx.info[i].map); 3334 if (err != 0) { 3335 device_printf(sc->dev, "Err %d tx dmamap\n", 3336 err); 3337 return err; 3338 } 3339 } 3340 return 0; 3341 3342 } 3343 3344 static int 3345 mxge_alloc_rings(mxge_softc_t *sc) 3346 { 3347 mxge_cmd_t cmd; 3348 int tx_ring_size; 3349 int tx_ring_entries, rx_ring_entries; 3350 int err, slice; 3351 3352 /* get ring sizes */ 3353 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3354 tx_ring_size = cmd.data0; 3355 if (err != 0) { 3356 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3357 goto abort; 3358 } 3359 3360 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3361 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3362 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3363 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3364 IFQ_SET_READY(&sc->ifp->if_snd); 3365 3366 for (slice = 0; slice < sc->num_slices; slice++) { 3367 err = mxge_alloc_slice_rings(&sc->ss[slice], 3368 rx_ring_entries, 3369 tx_ring_entries); 3370 if (err != 0) 3371 goto abort; 3372 } 3373 return 0; 3374 3375 abort: 3376 mxge_free_rings(sc); 3377 return err; 3378 3379 } 3380 3381 3382 static void 3383 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3384 { 3385 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3386 3387 if (bufsize < MCLBYTES) { 3388 /* easy, everything fits in a single buffer */ 3389 *big_buf_size = MCLBYTES; 3390 *cl_size = MCLBYTES; 3391 *nbufs = 1; 3392 return; 3393 } 3394 3395 if (bufsize < MJUMPAGESIZE) { 3396 /* still easy, everything still fits in a single buffer */ 3397 *big_buf_size = MJUMPAGESIZE; 3398 *cl_size = MJUMPAGESIZE; 3399 *nbufs = 1; 3400 return; 3401 } 3402 #if MXGE_VIRT_JUMBOS 3403 /* now we need to use virtually contiguous buffers */ 3404 *cl_size = MJUM9BYTES; 3405 *big_buf_size = 4096; 3406 *nbufs = mtu / 4096 + 1; 3407 /* needs to be a power of two, so round up */ 3408 if (*nbufs == 3) 3409 *nbufs = 4; 3410 #else 3411 *cl_size = MJUM9BYTES; 3412 *big_buf_size = MJUM9BYTES; 3413 *nbufs = 1; 3414 #endif 3415 } 3416 3417 static int 3418 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3419 { 3420 mxge_softc_t *sc; 3421 mxge_cmd_t cmd; 3422 bus_dmamap_t map; 3423 struct lro_entry *lro_entry; 3424 int err, i, slice; 3425 3426 3427 sc = ss->sc; 3428 slice = ss - sc->ss; 3429 3430 SLIST_INIT(&ss->lro_free); 3431 SLIST_INIT(&ss->lro_active); 3432 3433 for (i = 0; i < sc->lro_cnt; i++) { 3434 lro_entry = (struct lro_entry *) 3435 malloc(sizeof (*lro_entry), M_DEVBUF, 3436 M_NOWAIT | M_ZERO); 3437 if (lro_entry == NULL) { 3438 sc->lro_cnt = i; 3439 break; 3440 } 3441 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3442 } 3443 /* get the lanai pointers to the send and receive rings */ 3444 3445 err = 0; 3446 #ifndef IFNET_BUF_RING 3447 /* We currently only send from the first slice */ 3448 if (slice == 0) { 3449 #endif 3450 cmd.data0 = slice; 3451 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3452 ss->tx.lanai = 3453 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3454 ss->tx.send_go = (volatile uint32_t *) 3455 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3456 ss->tx.send_stop = (volatile uint32_t *) 3457 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3458 #ifndef IFNET_BUF_RING 3459 } 3460 #endif 3461 cmd.data0 = slice; 3462 err |= mxge_send_cmd(sc, 3463 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3464 ss->rx_small.lanai = 3465 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3466 cmd.data0 = slice; 3467 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3468 ss->rx_big.lanai = 3469 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3470 3471 if (err != 0) { 3472 device_printf(sc->dev, 3473 "failed to get ring sizes or locations\n"); 3474 return EIO; 3475 } 3476 3477 /* stock receive rings */ 3478 for (i = 0; i <= ss->rx_small.mask; i++) { 3479 map = ss->rx_small.info[i].map; 3480 err = mxge_get_buf_small(ss, map, i); 3481 if (err) { 3482 device_printf(sc->dev, "alloced %d/%d smalls\n", 3483 i, ss->rx_small.mask + 1); 3484 return ENOMEM; 3485 } 3486 } 3487 for (i = 0; i <= ss->rx_big.mask; i++) { 3488 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3489 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3490 } 3491 ss->rx_big.nbufs = nbufs; 3492 ss->rx_big.cl_size = cl_size; 3493 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3494 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3495 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3496 map = ss->rx_big.info[i].map; 3497 err = mxge_get_buf_big(ss, map, i); 3498 if (err) { 3499 device_printf(sc->dev, "alloced %d/%d bigs\n", 3500 i, ss->rx_big.mask + 1); 3501 return ENOMEM; 3502 } 3503 } 3504 return 0; 3505 } 3506 3507 static int 3508 mxge_open(mxge_softc_t *sc) 3509 { 3510 mxge_cmd_t cmd; 3511 int err, big_bytes, nbufs, slice, cl_size, i; 3512 bus_addr_t bus; 3513 volatile uint8_t *itable; 3514 struct mxge_slice_state *ss; 3515 3516 /* Copy the MAC address in case it was overridden */ 3517 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3518 3519 err = mxge_reset(sc, 1); 3520 if (err != 0) { 3521 device_printf(sc->dev, "failed to reset\n"); 3522 return EIO; 3523 } 3524 3525 if (sc->num_slices > 1) { 3526 /* setup the indirection table */ 3527 cmd.data0 = sc->num_slices; 3528 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3529 &cmd); 3530 3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3532 &cmd); 3533 if (err != 0) { 3534 device_printf(sc->dev, 3535 "failed to setup rss tables\n"); 3536 return err; 3537 } 3538 3539 /* just enable an identity mapping */ 3540 itable = sc->sram + cmd.data0; 3541 for (i = 0; i < sc->num_slices; i++) 3542 itable[i] = (uint8_t)i; 3543 3544 cmd.data0 = 1; 3545 cmd.data1 = mxge_rss_hash_type; 3546 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3547 if (err != 0) { 3548 device_printf(sc->dev, "failed to enable slices\n"); 3549 return err; 3550 } 3551 } 3552 3553 3554 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3555 3556 cmd.data0 = nbufs; 3557 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3558 &cmd); 3559 /* error is only meaningful if we're trying to set 3560 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3561 if (err && nbufs > 1) { 3562 device_printf(sc->dev, 3563 "Failed to set alway-use-n to %d\n", 3564 nbufs); 3565 return EIO; 3566 } 3567 /* Give the firmware the mtu and the big and small buffer 3568 sizes. The firmware wants the big buf size to be a power 3569 of two. Luckily, FreeBSD's clusters are powers of two */ 3570 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3571 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3572 cmd.data0 = MHLEN - MXGEFW_PAD; 3573 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3574 &cmd); 3575 cmd.data0 = big_bytes; 3576 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3577 3578 if (err != 0) { 3579 device_printf(sc->dev, "failed to setup params\n"); 3580 goto abort; 3581 } 3582 3583 /* Now give him the pointer to the stats block */ 3584 for (slice = 0; 3585 #ifdef IFNET_BUF_RING 3586 slice < sc->num_slices; 3587 #else 3588 slice < 1; 3589 #endif 3590 slice++) { 3591 ss = &sc->ss[slice]; 3592 cmd.data0 = 3593 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3594 cmd.data1 = 3595 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3596 cmd.data2 = sizeof(struct mcp_irq_data); 3597 cmd.data2 |= (slice << 16); 3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3599 } 3600 3601 if (err != 0) { 3602 bus = sc->ss->fw_stats_dma.bus_addr; 3603 bus += offsetof(struct mcp_irq_data, send_done_count); 3604 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3605 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3606 err = mxge_send_cmd(sc, 3607 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3608 &cmd); 3609 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3610 sc->fw_multicast_support = 0; 3611 } else { 3612 sc->fw_multicast_support = 1; 3613 } 3614 3615 if (err != 0) { 3616 device_printf(sc->dev, "failed to setup params\n"); 3617 goto abort; 3618 } 3619 3620 for (slice = 0; slice < sc->num_slices; slice++) { 3621 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3622 if (err != 0) { 3623 device_printf(sc->dev, "couldn't open slice %d\n", 3624 slice); 3625 goto abort; 3626 } 3627 } 3628 3629 /* Finally, start the firmware running */ 3630 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3631 if (err) { 3632 device_printf(sc->dev, "Couldn't bring up link\n"); 3633 goto abort; 3634 } 3635 #ifdef IFNET_BUF_RING 3636 for (slice = 0; slice < sc->num_slices; slice++) { 3637 ss = &sc->ss[slice]; 3638 ss->if_drv_flags |= IFF_DRV_RUNNING; 3639 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3640 } 3641 #endif 3642 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3643 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3644 3645 return 0; 3646 3647 3648 abort: 3649 mxge_free_mbufs(sc); 3650 3651 return err; 3652 } 3653 3654 static int 3655 mxge_close(mxge_softc_t *sc, int down) 3656 { 3657 mxge_cmd_t cmd; 3658 int err, old_down_cnt; 3659 #ifdef IFNET_BUF_RING 3660 struct mxge_slice_state *ss; 3661 int slice; 3662 #endif 3663 3664 #ifdef IFNET_BUF_RING 3665 for (slice = 0; slice < sc->num_slices; slice++) { 3666 ss = &sc->ss[slice]; 3667 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3668 } 3669 #endif 3670 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3671 if (!down) { 3672 old_down_cnt = sc->down_cnt; 3673 wmb(); 3674 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3675 if (err) { 3676 device_printf(sc->dev, 3677 "Couldn't bring down link\n"); 3678 } 3679 if (old_down_cnt == sc->down_cnt) { 3680 /* wait for down irq */ 3681 DELAY(10 * sc->intr_coal_delay); 3682 } 3683 wmb(); 3684 if (old_down_cnt == sc->down_cnt) { 3685 device_printf(sc->dev, "never got down irq\n"); 3686 } 3687 } 3688 mxge_free_mbufs(sc); 3689 3690 return 0; 3691 } 3692 3693 static void 3694 mxge_setup_cfg_space(mxge_softc_t *sc) 3695 { 3696 device_t dev = sc->dev; 3697 int reg; 3698 uint16_t cmd, lnk, pectl; 3699 3700 /* find the PCIe link width and set max read request to 4KB*/ 3701 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3702 lnk = pci_read_config(dev, reg + 0x12, 2); 3703 sc->link_width = (lnk >> 4) & 0x3f; 3704 3705 if (sc->pectl == 0) { 3706 pectl = pci_read_config(dev, reg + 0x8, 2); 3707 pectl = (pectl & ~0x7000) | (5 << 12); 3708 pci_write_config(dev, reg + 0x8, pectl, 2); 3709 sc->pectl = pectl; 3710 } else { 3711 /* restore saved pectl after watchdog reset */ 3712 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3713 } 3714 } 3715 3716 /* Enable DMA and Memory space access */ 3717 pci_enable_busmaster(dev); 3718 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3719 cmd |= PCIM_CMD_MEMEN; 3720 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3721 } 3722 3723 static uint32_t 3724 mxge_read_reboot(mxge_softc_t *sc) 3725 { 3726 device_t dev = sc->dev; 3727 uint32_t vs; 3728 3729 /* find the vendor specific offset */ 3730 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3731 device_printf(sc->dev, 3732 "could not find vendor specific offset\n"); 3733 return (uint32_t)-1; 3734 } 3735 /* enable read32 mode */ 3736 pci_write_config(dev, vs + 0x10, 0x3, 1); 3737 /* tell NIC which register to read */ 3738 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3739 return (pci_read_config(dev, vs + 0x14, 4)); 3740 } 3741 3742 static void 3743 mxge_watchdog_reset(mxge_softc_t *sc) 3744 { 3745 struct pci_devinfo *dinfo; 3746 struct mxge_slice_state *ss; 3747 int err, running, s, num_tx_slices = 1; 3748 uint32_t reboot; 3749 uint16_t cmd; 3750 3751 err = ENXIO; 3752 3753 device_printf(sc->dev, "Watchdog reset!\n"); 3754 3755 /* 3756 * check to see if the NIC rebooted. If it did, then all of 3757 * PCI config space has been reset, and things like the 3758 * busmaster bit will be zero. If this is the case, then we 3759 * must restore PCI config space before the NIC can be used 3760 * again 3761 */ 3762 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3763 if (cmd == 0xffff) { 3764 /* 3765 * maybe the watchdog caught the NIC rebooting; wait 3766 * up to 100ms for it to finish. If it does not come 3767 * back, then give up 3768 */ 3769 DELAY(1000*100); 3770 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3771 if (cmd == 0xffff) { 3772 device_printf(sc->dev, "NIC disappeared!\n"); 3773 } 3774 } 3775 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3776 /* print the reboot status */ 3777 reboot = mxge_read_reboot(sc); 3778 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3779 reboot); 3780 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3781 if (running) { 3782 3783 /* 3784 * quiesce NIC so that TX routines will not try to 3785 * xmit after restoration of BAR 3786 */ 3787 3788 /* Mark the link as down */ 3789 if (sc->link_state) { 3790 sc->link_state = 0; 3791 if_link_state_change(sc->ifp, 3792 LINK_STATE_DOWN); 3793 } 3794 #ifdef IFNET_BUF_RING 3795 num_tx_slices = sc->num_slices; 3796 #endif 3797 /* grab all TX locks to ensure no tx */ 3798 for (s = 0; s < num_tx_slices; s++) { 3799 ss = &sc->ss[s]; 3800 mtx_lock(&ss->tx.mtx); 3801 } 3802 mxge_close(sc, 1); 3803 } 3804 /* restore PCI configuration space */ 3805 dinfo = device_get_ivars(sc->dev); 3806 pci_cfg_restore(sc->dev, dinfo); 3807 3808 /* and redo any changes we made to our config space */ 3809 mxge_setup_cfg_space(sc); 3810 3811 /* reload f/w */ 3812 err = mxge_load_firmware(sc, 0); 3813 if (err) { 3814 device_printf(sc->dev, 3815 "Unable to re-load f/w\n"); 3816 } 3817 if (running) { 3818 if (!err) 3819 err = mxge_open(sc); 3820 /* release all TX locks */ 3821 for (s = 0; s < num_tx_slices; s++) { 3822 ss = &sc->ss[s]; 3823 #ifdef IFNET_BUF_RING 3824 mxge_start_locked(ss); 3825 #endif 3826 mtx_unlock(&ss->tx.mtx); 3827 } 3828 } 3829 sc->watchdog_resets++; 3830 } else { 3831 device_printf(sc->dev, 3832 "NIC did not reboot, not resetting\n"); 3833 err = 0; 3834 } 3835 if (err) { 3836 device_printf(sc->dev, "watchdog reset failed\n"); 3837 } else { 3838 if (sc->dying == 2) 3839 sc->dying = 0; 3840 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3841 } 3842 } 3843 3844 static void 3845 mxge_watchdog_task(void *arg, int pending) 3846 { 3847 mxge_softc_t *sc = arg; 3848 3849 3850 mtx_lock(&sc->driver_mtx); 3851 mxge_watchdog_reset(sc); 3852 mtx_unlock(&sc->driver_mtx); 3853 } 3854 3855 static void 3856 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3857 { 3858 tx = &sc->ss[slice].tx; 3859 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3860 device_printf(sc->dev, 3861 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3862 tx->req, tx->done, tx->queue_active); 3863 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3864 tx->activate, tx->deactivate); 3865 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3866 tx->pkt_done, 3867 be32toh(sc->ss->fw_stats->send_done_count)); 3868 } 3869 3870 static int 3871 mxge_watchdog(mxge_softc_t *sc) 3872 { 3873 mxge_tx_ring_t *tx; 3874 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3875 int i, err = 0; 3876 3877 /* see if we have outstanding transmits, which 3878 have been pending for more than mxge_ticks */ 3879 for (i = 0; 3880 #ifdef IFNET_BUF_RING 3881 (i < sc->num_slices) && (err == 0); 3882 #else 3883 (i < 1) && (err == 0); 3884 #endif 3885 i++) { 3886 tx = &sc->ss[i].tx; 3887 if (tx->req != tx->done && 3888 tx->watchdog_req != tx->watchdog_done && 3889 tx->done == tx->watchdog_done) { 3890 /* check for pause blocking before resetting */ 3891 if (tx->watchdog_rx_pause == rx_pause) { 3892 mxge_warn_stuck(sc, tx, i); 3893 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3894 return (ENXIO); 3895 } 3896 else 3897 device_printf(sc->dev, "Flow control blocking " 3898 "xmits, check link partner\n"); 3899 } 3900 3901 tx->watchdog_req = tx->req; 3902 tx->watchdog_done = tx->done; 3903 tx->watchdog_rx_pause = rx_pause; 3904 } 3905 3906 if (sc->need_media_probe) 3907 mxge_media_probe(sc); 3908 return (err); 3909 } 3910 3911 static u_long 3912 mxge_update_stats(mxge_softc_t *sc) 3913 { 3914 struct mxge_slice_state *ss; 3915 u_long pkts = 0; 3916 u_long ipackets = 0; 3917 u_long opackets = 0; 3918 #ifdef IFNET_BUF_RING 3919 u_long obytes = 0; 3920 u_long omcasts = 0; 3921 u_long odrops = 0; 3922 #endif 3923 u_long oerrors = 0; 3924 int slice; 3925 3926 for (slice = 0; slice < sc->num_slices; slice++) { 3927 ss = &sc->ss[slice]; 3928 ipackets += ss->ipackets; 3929 opackets += ss->opackets; 3930 #ifdef IFNET_BUF_RING 3931 obytes += ss->obytes; 3932 omcasts += ss->omcasts; 3933 odrops += ss->tx.br->br_drops; 3934 #endif 3935 oerrors += ss->oerrors; 3936 } 3937 pkts = (ipackets - sc->ifp->if_ipackets); 3938 pkts += (opackets - sc->ifp->if_opackets); 3939 sc->ifp->if_ipackets = ipackets; 3940 sc->ifp->if_opackets = opackets; 3941 #ifdef IFNET_BUF_RING 3942 sc->ifp->if_obytes = obytes; 3943 sc->ifp->if_omcasts = omcasts; 3944 sc->ifp->if_snd.ifq_drops = odrops; 3945 #endif 3946 sc->ifp->if_oerrors = oerrors; 3947 return pkts; 3948 } 3949 3950 static void 3951 mxge_tick(void *arg) 3952 { 3953 mxge_softc_t *sc = arg; 3954 u_long pkts = 0; 3955 int err = 0; 3956 int running, ticks; 3957 uint16_t cmd; 3958 3959 ticks = mxge_ticks; 3960 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3961 if (running) { 3962 /* aggregate stats from different slices */ 3963 pkts = mxge_update_stats(sc); 3964 if (!sc->watchdog_countdown) { 3965 err = mxge_watchdog(sc); 3966 sc->watchdog_countdown = 4; 3967 } 3968 sc->watchdog_countdown--; 3969 } 3970 if (pkts == 0) { 3971 /* ensure NIC did not suffer h/w fault while idle */ 3972 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3973 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3974 sc->dying = 2; 3975 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3976 err = ENXIO; 3977 } 3978 /* look less often if NIC is idle */ 3979 ticks *= 4; 3980 } 3981 3982 if (err == 0) 3983 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3984 3985 } 3986 3987 static int 3988 mxge_media_change(struct ifnet *ifp) 3989 { 3990 return EINVAL; 3991 } 3992 3993 static int 3994 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3995 { 3996 struct ifnet *ifp = sc->ifp; 3997 int real_mtu, old_mtu; 3998 int err = 0; 3999 4000 4001 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4002 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4003 return EINVAL; 4004 mtx_lock(&sc->driver_mtx); 4005 old_mtu = ifp->if_mtu; 4006 ifp->if_mtu = mtu; 4007 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4008 mxge_close(sc, 0); 4009 err = mxge_open(sc); 4010 if (err != 0) { 4011 ifp->if_mtu = old_mtu; 4012 mxge_close(sc, 0); 4013 (void) mxge_open(sc); 4014 } 4015 } 4016 mtx_unlock(&sc->driver_mtx); 4017 return err; 4018 } 4019 4020 static void 4021 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4022 { 4023 mxge_softc_t *sc = ifp->if_softc; 4024 4025 4026 if (sc == NULL) 4027 return; 4028 ifmr->ifm_status = IFM_AVALID; 4029 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4030 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 4031 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 4032 } 4033 4034 static int 4035 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4036 { 4037 mxge_softc_t *sc = ifp->if_softc; 4038 struct ifreq *ifr = (struct ifreq *)data; 4039 int err, mask; 4040 4041 err = 0; 4042 switch (command) { 4043 case SIOCSIFADDR: 4044 case SIOCGIFADDR: 4045 err = ether_ioctl(ifp, command, data); 4046 break; 4047 4048 case SIOCSIFMTU: 4049 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4050 break; 4051 4052 case SIOCSIFFLAGS: 4053 mtx_lock(&sc->driver_mtx); 4054 if (sc->dying) { 4055 mtx_unlock(&sc->driver_mtx); 4056 return EINVAL; 4057 } 4058 if (ifp->if_flags & IFF_UP) { 4059 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4060 err = mxge_open(sc); 4061 } else { 4062 /* take care of promis can allmulti 4063 flag chages */ 4064 mxge_change_promisc(sc, 4065 ifp->if_flags & IFF_PROMISC); 4066 mxge_set_multicast_list(sc); 4067 } 4068 } else { 4069 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4070 mxge_close(sc, 0); 4071 } 4072 } 4073 mtx_unlock(&sc->driver_mtx); 4074 break; 4075 4076 case SIOCADDMULTI: 4077 case SIOCDELMULTI: 4078 mtx_lock(&sc->driver_mtx); 4079 mxge_set_multicast_list(sc); 4080 mtx_unlock(&sc->driver_mtx); 4081 break; 4082 4083 case SIOCSIFCAP: 4084 mtx_lock(&sc->driver_mtx); 4085 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4086 if (mask & IFCAP_TXCSUM) { 4087 if (IFCAP_TXCSUM & ifp->if_capenable) { 4088 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4089 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4090 | CSUM_TSO); 4091 } else { 4092 ifp->if_capenable |= IFCAP_TXCSUM; 4093 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4094 } 4095 } else if (mask & IFCAP_RXCSUM) { 4096 if (IFCAP_RXCSUM & ifp->if_capenable) { 4097 ifp->if_capenable &= ~IFCAP_RXCSUM; 4098 sc->csum_flag = 0; 4099 } else { 4100 ifp->if_capenable |= IFCAP_RXCSUM; 4101 sc->csum_flag = 1; 4102 } 4103 } 4104 if (mask & IFCAP_TSO4) { 4105 if (IFCAP_TSO4 & ifp->if_capenable) { 4106 ifp->if_capenable &= ~IFCAP_TSO4; 4107 ifp->if_hwassist &= ~CSUM_TSO; 4108 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4109 ifp->if_capenable |= IFCAP_TSO4; 4110 ifp->if_hwassist |= CSUM_TSO; 4111 } else { 4112 printf("mxge requires tx checksum offload" 4113 " be enabled to use TSO\n"); 4114 err = EINVAL; 4115 } 4116 } 4117 if (mask & IFCAP_LRO) { 4118 if (IFCAP_LRO & ifp->if_capenable) 4119 err = mxge_change_lro_locked(sc, 0); 4120 else 4121 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4122 } 4123 if (mask & IFCAP_VLAN_HWTAGGING) 4124 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4125 mtx_unlock(&sc->driver_mtx); 4126 VLAN_CAPABILITIES(ifp); 4127 4128 break; 4129 4130 case SIOCGIFMEDIA: 4131 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4132 &sc->media, command); 4133 break; 4134 4135 default: 4136 err = ENOTTY; 4137 } 4138 return err; 4139 } 4140 4141 static void 4142 mxge_fetch_tunables(mxge_softc_t *sc) 4143 { 4144 4145 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4146 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4147 &mxge_flow_control); 4148 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4149 &mxge_intr_coal_delay); 4150 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4151 &mxge_nvidia_ecrc_enable); 4152 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4153 &mxge_force_firmware); 4154 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4155 &mxge_deassert_wait); 4156 TUNABLE_INT_FETCH("hw.mxge.verbose", 4157 &mxge_verbose); 4158 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4159 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4160 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4161 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4162 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4163 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4164 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4165 if (sc->lro_cnt != 0) 4166 mxge_lro_cnt = sc->lro_cnt; 4167 4168 if (bootverbose) 4169 mxge_verbose = 1; 4170 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4171 mxge_intr_coal_delay = 30; 4172 if (mxge_ticks == 0) 4173 mxge_ticks = hz / 2; 4174 sc->pause = mxge_flow_control; 4175 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4176 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4177 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4178 } 4179 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4180 mxge_initial_mtu < ETHER_MIN_LEN) 4181 mxge_initial_mtu = ETHERMTU_JUMBO; 4182 4183 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4184 mxge_throttle = MXGE_MAX_THROTTLE; 4185 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4186 mxge_throttle = MXGE_MIN_THROTTLE; 4187 sc->throttle = mxge_throttle; 4188 } 4189 4190 4191 static void 4192 mxge_free_slices(mxge_softc_t *sc) 4193 { 4194 struct mxge_slice_state *ss; 4195 int i; 4196 4197 4198 if (sc->ss == NULL) 4199 return; 4200 4201 for (i = 0; i < sc->num_slices; i++) { 4202 ss = &sc->ss[i]; 4203 if (ss->fw_stats != NULL) { 4204 mxge_dma_free(&ss->fw_stats_dma); 4205 ss->fw_stats = NULL; 4206 #ifdef IFNET_BUF_RING 4207 if (ss->tx.br != NULL) { 4208 drbr_free(ss->tx.br, M_DEVBUF); 4209 ss->tx.br = NULL; 4210 } 4211 #endif 4212 mtx_destroy(&ss->tx.mtx); 4213 } 4214 if (ss->rx_done.entry != NULL) { 4215 mxge_dma_free(&ss->rx_done.dma); 4216 ss->rx_done.entry = NULL; 4217 } 4218 } 4219 free(sc->ss, M_DEVBUF); 4220 sc->ss = NULL; 4221 } 4222 4223 static int 4224 mxge_alloc_slices(mxge_softc_t *sc) 4225 { 4226 mxge_cmd_t cmd; 4227 struct mxge_slice_state *ss; 4228 size_t bytes; 4229 int err, i, max_intr_slots; 4230 4231 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4232 if (err != 0) { 4233 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4234 return err; 4235 } 4236 sc->rx_ring_size = cmd.data0; 4237 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4238 4239 bytes = sizeof (*sc->ss) * sc->num_slices; 4240 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4241 if (sc->ss == NULL) 4242 return (ENOMEM); 4243 for (i = 0; i < sc->num_slices; i++) { 4244 ss = &sc->ss[i]; 4245 4246 ss->sc = sc; 4247 4248 /* allocate per-slice rx interrupt queues */ 4249 4250 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4251 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4252 if (err != 0) 4253 goto abort; 4254 ss->rx_done.entry = ss->rx_done.dma.addr; 4255 bzero(ss->rx_done.entry, bytes); 4256 4257 /* 4258 * allocate the per-slice firmware stats; stats 4259 * (including tx) are used used only on the first 4260 * slice for now 4261 */ 4262 #ifndef IFNET_BUF_RING 4263 if (i > 0) 4264 continue; 4265 #endif 4266 4267 bytes = sizeof (*ss->fw_stats); 4268 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4269 sizeof (*ss->fw_stats), 64); 4270 if (err != 0) 4271 goto abort; 4272 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4273 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4274 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4275 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4276 #ifdef IFNET_BUF_RING 4277 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4278 &ss->tx.mtx); 4279 #endif 4280 } 4281 4282 return (0); 4283 4284 abort: 4285 mxge_free_slices(sc); 4286 return (ENOMEM); 4287 } 4288 4289 static void 4290 mxge_slice_probe(mxge_softc_t *sc) 4291 { 4292 mxge_cmd_t cmd; 4293 char *old_fw; 4294 int msix_cnt, status, max_intr_slots; 4295 4296 sc->num_slices = 1; 4297 /* 4298 * don't enable multiple slices if they are not enabled, 4299 * or if this is not an SMP system 4300 */ 4301 4302 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4303 return; 4304 4305 /* see how many MSI-X interrupts are available */ 4306 msix_cnt = pci_msix_count(sc->dev); 4307 if (msix_cnt < 2) 4308 return; 4309 4310 /* now load the slice aware firmware see what it supports */ 4311 old_fw = sc->fw_name; 4312 if (old_fw == mxge_fw_aligned) 4313 sc->fw_name = mxge_fw_rss_aligned; 4314 else 4315 sc->fw_name = mxge_fw_rss_unaligned; 4316 status = mxge_load_firmware(sc, 0); 4317 if (status != 0) { 4318 device_printf(sc->dev, "Falling back to a single slice\n"); 4319 return; 4320 } 4321 4322 /* try to send a reset command to the card to see if it 4323 is alive */ 4324 memset(&cmd, 0, sizeof (cmd)); 4325 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4326 if (status != 0) { 4327 device_printf(sc->dev, "failed reset\n"); 4328 goto abort_with_fw; 4329 } 4330 4331 /* get rx ring size */ 4332 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4333 if (status != 0) { 4334 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4335 goto abort_with_fw; 4336 } 4337 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4338 4339 /* tell it the size of the interrupt queues */ 4340 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4341 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4342 if (status != 0) { 4343 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4344 goto abort_with_fw; 4345 } 4346 4347 /* ask the maximum number of slices it supports */ 4348 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4349 if (status != 0) { 4350 device_printf(sc->dev, 4351 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4352 goto abort_with_fw; 4353 } 4354 sc->num_slices = cmd.data0; 4355 if (sc->num_slices > msix_cnt) 4356 sc->num_slices = msix_cnt; 4357 4358 if (mxge_max_slices == -1) { 4359 /* cap to number of CPUs in system */ 4360 if (sc->num_slices > mp_ncpus) 4361 sc->num_slices = mp_ncpus; 4362 } else { 4363 if (sc->num_slices > mxge_max_slices) 4364 sc->num_slices = mxge_max_slices; 4365 } 4366 /* make sure it is a power of two */ 4367 while (sc->num_slices & (sc->num_slices - 1)) 4368 sc->num_slices--; 4369 4370 if (mxge_verbose) 4371 device_printf(sc->dev, "using %d slices\n", 4372 sc->num_slices); 4373 4374 return; 4375 4376 abort_with_fw: 4377 sc->fw_name = old_fw; 4378 (void) mxge_load_firmware(sc, 0); 4379 } 4380 4381 static int 4382 mxge_add_msix_irqs(mxge_softc_t *sc) 4383 { 4384 size_t bytes; 4385 int count, err, i, rid; 4386 4387 rid = PCIR_BAR(2); 4388 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4389 &rid, RF_ACTIVE); 4390 4391 if (sc->msix_table_res == NULL) { 4392 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4393 return ENXIO; 4394 } 4395 4396 count = sc->num_slices; 4397 err = pci_alloc_msix(sc->dev, &count); 4398 if (err != 0) { 4399 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4400 "err = %d \n", sc->num_slices, err); 4401 goto abort_with_msix_table; 4402 } 4403 if (count < sc->num_slices) { 4404 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4405 count, sc->num_slices); 4406 device_printf(sc->dev, 4407 "Try setting hw.mxge.max_slices to %d\n", 4408 count); 4409 err = ENOSPC; 4410 goto abort_with_msix; 4411 } 4412 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4413 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4414 if (sc->msix_irq_res == NULL) { 4415 err = ENOMEM; 4416 goto abort_with_msix; 4417 } 4418 4419 for (i = 0; i < sc->num_slices; i++) { 4420 rid = i + 1; 4421 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4422 SYS_RES_IRQ, 4423 &rid, RF_ACTIVE); 4424 if (sc->msix_irq_res[i] == NULL) { 4425 device_printf(sc->dev, "couldn't allocate IRQ res" 4426 " for message %d\n", i); 4427 err = ENXIO; 4428 goto abort_with_res; 4429 } 4430 } 4431 4432 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4433 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4434 4435 for (i = 0; i < sc->num_slices; i++) { 4436 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4437 INTR_TYPE_NET | INTR_MPSAFE, 4438 #if __FreeBSD_version > 700030 4439 NULL, 4440 #endif 4441 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4442 if (err != 0) { 4443 device_printf(sc->dev, "couldn't setup intr for " 4444 "message %d\n", i); 4445 goto abort_with_intr; 4446 } 4447 } 4448 4449 if (mxge_verbose) { 4450 device_printf(sc->dev, "using %d msix IRQs:", 4451 sc->num_slices); 4452 for (i = 0; i < sc->num_slices; i++) 4453 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4454 printf("\n"); 4455 } 4456 return (0); 4457 4458 abort_with_intr: 4459 for (i = 0; i < sc->num_slices; i++) { 4460 if (sc->msix_ih[i] != NULL) { 4461 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4462 sc->msix_ih[i]); 4463 sc->msix_ih[i] = NULL; 4464 } 4465 } 4466 free(sc->msix_ih, M_DEVBUF); 4467 4468 4469 abort_with_res: 4470 for (i = 0; i < sc->num_slices; i++) { 4471 rid = i + 1; 4472 if (sc->msix_irq_res[i] != NULL) 4473 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4474 sc->msix_irq_res[i]); 4475 sc->msix_irq_res[i] = NULL; 4476 } 4477 free(sc->msix_irq_res, M_DEVBUF); 4478 4479 4480 abort_with_msix: 4481 pci_release_msi(sc->dev); 4482 4483 abort_with_msix_table: 4484 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4485 sc->msix_table_res); 4486 4487 return err; 4488 } 4489 4490 static int 4491 mxge_add_single_irq(mxge_softc_t *sc) 4492 { 4493 int count, err, rid; 4494 4495 count = pci_msi_count(sc->dev); 4496 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4497 rid = 1; 4498 } else { 4499 rid = 0; 4500 sc->legacy_irq = 1; 4501 } 4502 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4503 1, RF_SHAREABLE | RF_ACTIVE); 4504 if (sc->irq_res == NULL) { 4505 device_printf(sc->dev, "could not alloc interrupt\n"); 4506 return ENXIO; 4507 } 4508 if (mxge_verbose) 4509 device_printf(sc->dev, "using %s irq %ld\n", 4510 sc->legacy_irq ? "INTx" : "MSI", 4511 rman_get_start(sc->irq_res)); 4512 err = bus_setup_intr(sc->dev, sc->irq_res, 4513 INTR_TYPE_NET | INTR_MPSAFE, 4514 #if __FreeBSD_version > 700030 4515 NULL, 4516 #endif 4517 mxge_intr, &sc->ss[0], &sc->ih); 4518 if (err != 0) { 4519 bus_release_resource(sc->dev, SYS_RES_IRQ, 4520 sc->legacy_irq ? 0 : 1, sc->irq_res); 4521 if (!sc->legacy_irq) 4522 pci_release_msi(sc->dev); 4523 } 4524 return err; 4525 } 4526 4527 static void 4528 mxge_rem_msix_irqs(mxge_softc_t *sc) 4529 { 4530 int i, rid; 4531 4532 for (i = 0; i < sc->num_slices; i++) { 4533 if (sc->msix_ih[i] != NULL) { 4534 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4535 sc->msix_ih[i]); 4536 sc->msix_ih[i] = NULL; 4537 } 4538 } 4539 free(sc->msix_ih, M_DEVBUF); 4540 4541 for (i = 0; i < sc->num_slices; i++) { 4542 rid = i + 1; 4543 if (sc->msix_irq_res[i] != NULL) 4544 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4545 sc->msix_irq_res[i]); 4546 sc->msix_irq_res[i] = NULL; 4547 } 4548 free(sc->msix_irq_res, M_DEVBUF); 4549 4550 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4551 sc->msix_table_res); 4552 4553 pci_release_msi(sc->dev); 4554 return; 4555 } 4556 4557 static void 4558 mxge_rem_single_irq(mxge_softc_t *sc) 4559 { 4560 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4561 bus_release_resource(sc->dev, SYS_RES_IRQ, 4562 sc->legacy_irq ? 0 : 1, sc->irq_res); 4563 if (!sc->legacy_irq) 4564 pci_release_msi(sc->dev); 4565 } 4566 4567 static void 4568 mxge_rem_irq(mxge_softc_t *sc) 4569 { 4570 if (sc->num_slices > 1) 4571 mxge_rem_msix_irqs(sc); 4572 else 4573 mxge_rem_single_irq(sc); 4574 } 4575 4576 static int 4577 mxge_add_irq(mxge_softc_t *sc) 4578 { 4579 int err; 4580 4581 if (sc->num_slices > 1) 4582 err = mxge_add_msix_irqs(sc); 4583 else 4584 err = mxge_add_single_irq(sc); 4585 4586 if (0 && err == 0 && sc->num_slices > 1) { 4587 mxge_rem_msix_irqs(sc); 4588 err = mxge_add_msix_irqs(sc); 4589 } 4590 return err; 4591 } 4592 4593 4594 static int 4595 mxge_attach(device_t dev) 4596 { 4597 mxge_softc_t *sc = device_get_softc(dev); 4598 struct ifnet *ifp; 4599 int err, rid; 4600 4601 sc->dev = dev; 4602 mxge_fetch_tunables(sc); 4603 4604 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4605 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4606 taskqueue_thread_enqueue, 4607 &sc->tq); 4608 if (sc->tq == NULL) { 4609 err = ENOMEM; 4610 goto abort_with_nothing; 4611 } 4612 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4613 device_get_nameunit(sc->dev)); 4614 4615 err = bus_dma_tag_create(NULL, /* parent */ 4616 1, /* alignment */ 4617 0, /* boundary */ 4618 BUS_SPACE_MAXADDR, /* low */ 4619 BUS_SPACE_MAXADDR, /* high */ 4620 NULL, NULL, /* filter */ 4621 65536 + 256, /* maxsize */ 4622 MXGE_MAX_SEND_DESC, /* num segs */ 4623 65536, /* maxsegsize */ 4624 0, /* flags */ 4625 NULL, NULL, /* lock */ 4626 &sc->parent_dmat); /* tag */ 4627 4628 if (err != 0) { 4629 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4630 err); 4631 goto abort_with_tq; 4632 } 4633 4634 ifp = sc->ifp = if_alloc(IFT_ETHER); 4635 if (ifp == NULL) { 4636 device_printf(dev, "can not if_alloc()\n"); 4637 err = ENOSPC; 4638 goto abort_with_parent_dmat; 4639 } 4640 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4641 4642 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4643 device_get_nameunit(dev)); 4644 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4645 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4646 "%s:drv", device_get_nameunit(dev)); 4647 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4648 MTX_NETWORK_LOCK, MTX_DEF); 4649 4650 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4651 4652 mxge_setup_cfg_space(sc); 4653 4654 /* Map the board into the kernel */ 4655 rid = PCIR_BARS; 4656 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4657 ~0, 1, RF_ACTIVE); 4658 if (sc->mem_res == NULL) { 4659 device_printf(dev, "could not map memory\n"); 4660 err = ENXIO; 4661 goto abort_with_lock; 4662 } 4663 sc->sram = rman_get_virtual(sc->mem_res); 4664 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4665 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4666 device_printf(dev, "impossible memory region size %ld\n", 4667 rman_get_size(sc->mem_res)); 4668 err = ENXIO; 4669 goto abort_with_mem_res; 4670 } 4671 4672 /* make NULL terminated copy of the EEPROM strings section of 4673 lanai SRAM */ 4674 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4675 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4676 rman_get_bushandle(sc->mem_res), 4677 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4678 sc->eeprom_strings, 4679 MXGE_EEPROM_STRINGS_SIZE - 2); 4680 err = mxge_parse_strings(sc); 4681 if (err != 0) 4682 goto abort_with_mem_res; 4683 4684 /* Enable write combining for efficient use of PCIe bus */ 4685 mxge_enable_wc(sc); 4686 4687 /* Allocate the out of band dma memory */ 4688 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4689 sizeof (mxge_cmd_t), 64); 4690 if (err != 0) 4691 goto abort_with_mem_res; 4692 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4693 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4694 if (err != 0) 4695 goto abort_with_cmd_dma; 4696 4697 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4698 if (err != 0) 4699 goto abort_with_zeropad_dma; 4700 4701 /* select & load the firmware */ 4702 err = mxge_select_firmware(sc); 4703 if (err != 0) 4704 goto abort_with_dmabench; 4705 sc->intr_coal_delay = mxge_intr_coal_delay; 4706 4707 mxge_slice_probe(sc); 4708 err = mxge_alloc_slices(sc); 4709 if (err != 0) 4710 goto abort_with_dmabench; 4711 4712 err = mxge_reset(sc, 0); 4713 if (err != 0) 4714 goto abort_with_slices; 4715 4716 err = mxge_alloc_rings(sc); 4717 if (err != 0) { 4718 device_printf(sc->dev, "failed to allocate rings\n"); 4719 goto abort_with_dmabench; 4720 } 4721 4722 err = mxge_add_irq(sc); 4723 if (err != 0) { 4724 device_printf(sc->dev, "failed to add irq\n"); 4725 goto abort_with_rings; 4726 } 4727 4728 ifp->if_baudrate = IF_Gbps(10UL); 4729 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4730 IFCAP_VLAN_MTU; 4731 #ifdef INET 4732 ifp->if_capabilities |= IFCAP_LRO; 4733 #endif 4734 4735 #ifdef MXGE_NEW_VLAN_API 4736 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4737 #endif 4738 4739 sc->max_mtu = mxge_max_mtu(sc); 4740 if (sc->max_mtu >= 9000) 4741 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4742 else 4743 device_printf(dev, "MTU limited to %d. Install " 4744 "latest firmware for 9000 byte jumbo support\n", 4745 sc->max_mtu - ETHER_HDR_LEN); 4746 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4747 ifp->if_capenable = ifp->if_capabilities; 4748 if (sc->lro_cnt == 0) 4749 ifp->if_capenable &= ~IFCAP_LRO; 4750 sc->csum_flag = 1; 4751 ifp->if_init = mxge_init; 4752 ifp->if_softc = sc; 4753 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4754 ifp->if_ioctl = mxge_ioctl; 4755 ifp->if_start = mxge_start; 4756 /* Initialise the ifmedia structure */ 4757 ifmedia_init(&sc->media, 0, mxge_media_change, 4758 mxge_media_status); 4759 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4760 mxge_media_probe(sc); 4761 sc->dying = 0; 4762 ether_ifattach(ifp, sc->mac_addr); 4763 /* ether_ifattach sets mtu to ETHERMTU */ 4764 if (mxge_initial_mtu != ETHERMTU) 4765 mxge_change_mtu(sc, mxge_initial_mtu); 4766 4767 mxge_add_sysctls(sc); 4768 #ifdef IFNET_BUF_RING 4769 ifp->if_transmit = mxge_transmit; 4770 ifp->if_qflush = mxge_qflush; 4771 #endif 4772 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4773 return 0; 4774 4775 abort_with_rings: 4776 mxge_free_rings(sc); 4777 abort_with_slices: 4778 mxge_free_slices(sc); 4779 abort_with_dmabench: 4780 mxge_dma_free(&sc->dmabench_dma); 4781 abort_with_zeropad_dma: 4782 mxge_dma_free(&sc->zeropad_dma); 4783 abort_with_cmd_dma: 4784 mxge_dma_free(&sc->cmd_dma); 4785 abort_with_mem_res: 4786 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4787 abort_with_lock: 4788 pci_disable_busmaster(dev); 4789 mtx_destroy(&sc->cmd_mtx); 4790 mtx_destroy(&sc->driver_mtx); 4791 if_free(ifp); 4792 abort_with_parent_dmat: 4793 bus_dma_tag_destroy(sc->parent_dmat); 4794 abort_with_tq: 4795 if (sc->tq != NULL) { 4796 taskqueue_drain(sc->tq, &sc->watchdog_task); 4797 taskqueue_free(sc->tq); 4798 sc->tq = NULL; 4799 } 4800 abort_with_nothing: 4801 return err; 4802 } 4803 4804 static int 4805 mxge_detach(device_t dev) 4806 { 4807 mxge_softc_t *sc = device_get_softc(dev); 4808 4809 if (mxge_vlans_active(sc)) { 4810 device_printf(sc->dev, 4811 "Detach vlans before removing module\n"); 4812 return EBUSY; 4813 } 4814 mtx_lock(&sc->driver_mtx); 4815 sc->dying = 1; 4816 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4817 mxge_close(sc, 0); 4818 mtx_unlock(&sc->driver_mtx); 4819 ether_ifdetach(sc->ifp); 4820 if (sc->tq != NULL) { 4821 taskqueue_drain(sc->tq, &sc->watchdog_task); 4822 taskqueue_free(sc->tq); 4823 sc->tq = NULL; 4824 } 4825 callout_drain(&sc->co_hdl); 4826 ifmedia_removeall(&sc->media); 4827 mxge_dummy_rdma(sc, 0); 4828 mxge_rem_sysctls(sc); 4829 mxge_rem_irq(sc); 4830 mxge_free_rings(sc); 4831 mxge_free_slices(sc); 4832 mxge_dma_free(&sc->dmabench_dma); 4833 mxge_dma_free(&sc->zeropad_dma); 4834 mxge_dma_free(&sc->cmd_dma); 4835 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4836 pci_disable_busmaster(dev); 4837 mtx_destroy(&sc->cmd_mtx); 4838 mtx_destroy(&sc->driver_mtx); 4839 if_free(sc->ifp); 4840 bus_dma_tag_destroy(sc->parent_dmat); 4841 return 0; 4842 } 4843 4844 static int 4845 mxge_shutdown(device_t dev) 4846 { 4847 return 0; 4848 } 4849 4850 /* 4851 This file uses Myri10GE driver indentation. 4852 4853 Local Variables: 4854 c-file-style:"linux" 4855 tab-width:8 4856 End: 4857 */ 4858