1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 /* count xmits ourselves, rather than via drbr */ 51 #define NO_SLOW_STATS 52 #include <net/if.h> 53 #include <net/if_arp.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/if_vlan_var.h> 62 #include <net/zlib.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 69 #include <machine/bus.h> 70 #include <machine/in_cksum.h> 71 #include <machine/resource.h> 72 #include <sys/bus.h> 73 #include <sys/rman.h> 74 #include <sys/smp.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 /*#define MXGE_FAKE_IFP*/ 90 #include <dev/mxge/if_mxge_var.h> 91 #ifdef IFNET_BUF_RING 92 #include <sys/buf_ring.h> 93 #endif 94 95 #include "opt_inet.h" 96 97 /* tunable params */ 98 static int mxge_nvidia_ecrc_enable = 1; 99 static int mxge_force_firmware = 0; 100 static int mxge_intr_coal_delay = 30; 101 static int mxge_deassert_wait = 1; 102 static int mxge_flow_control = 1; 103 static int mxge_verbose = 0; 104 static int mxge_lro_cnt = 8; 105 static int mxge_ticks; 106 static int mxge_max_slices = 1; 107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108 static int mxge_always_promisc = 0; 109 static int mxge_initial_mtu = ETHERMTU_JUMBO; 110 static int mxge_throttle = 0; 111 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112 static char *mxge_fw_aligned = "mxge_eth_z8e"; 113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116 static int mxge_probe(device_t dev); 117 static int mxge_attach(device_t dev); 118 static int mxge_detach(device_t dev); 119 static int mxge_shutdown(device_t dev); 120 static void mxge_intr(void *arg); 121 122 static device_method_t mxge_methods[] = 123 { 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130 }; 131 132 static driver_t mxge_driver = 133 { 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137 }; 138 139 static devclass_t mxge_devclass; 140 141 /* Declare ourselves to be a child of the PCI bus.*/ 142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148 static int mxge_close(mxge_softc_t *sc, int down); 149 static int mxge_open(mxge_softc_t *sc); 150 static void mxge_tick(void *arg); 151 152 static int 153 mxge_probe(device_t dev) 154 { 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 200 /* callback to get our DMA address */ 201 static void 202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204 { 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263 abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265 abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268 } 269 270 271 static void 272 mxge_dma_free(mxge_dma_t *dma) 273 { 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277 } 278 279 /* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286 static int 287 mxge_parse_strings(mxge_softc_t *sc) 288 { 289 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327 } 328 329 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330 static void 331 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332 { 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381 #if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390 #endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452 } 453 #else 454 static void 455 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456 { 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460 } 461 #endif 462 463 464 static int 465 mxge_dma_test(mxge_softc_t *sc, int test_type) 466 { 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517 abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523 } 524 525 /* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544 static int 545 mxge_firmware_probe(mxge_softc_t *sc) 546 { 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595 } 596 597 static int 598 mxge_select_firmware(mxge_softc_t *sc) 599 { 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631 abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640 } 641 642 union qualhack 643 { 644 const char *ro_char; 645 char *rw_char; 646 }; 647 648 static int 649 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650 { 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static void * 680 z_alloc(void *nil, u_int items, u_int size) 681 { 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686 } 687 688 static void 689 z_free(void *nil, void *ptr) 690 { 691 free(ptr, M_TEMP); 692 } 693 694 695 static int 696 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697 { 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770 abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772 abort_with_zs: 773 inflateEnd(&zs); 774 abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777 } 778 779 /* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784 static void 785 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786 { 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834 } 835 836 static int 837 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838 { 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 case MXGEFW_CMD_ERROR_I2C_ABSENT: 887 err = ENXIO; 888 break; 889 default: 890 device_printf(sc->dev, 891 "mxge: command %d " 892 "failed, result = %d\n", 893 cmd, be32toh(response->result)); 894 err = ENXIO; 895 break; 896 } 897 if (err != EAGAIN) 898 break; 899 } 900 if (err == EAGAIN) 901 device_printf(sc->dev, "mxge: command %d timed out" 902 "result = %d\n", 903 cmd, be32toh(response->result)); 904 mtx_unlock(&sc->cmd_mtx); 905 return err; 906 } 907 908 static int 909 mxge_adopt_running_firmware(mxge_softc_t *sc) 910 { 911 struct mcp_gen_header *hdr; 912 const size_t bytes = sizeof (struct mcp_gen_header); 913 size_t hdr_offset; 914 int status; 915 916 /* find running firmware header */ 917 hdr_offset = htobe32(*(volatile uint32_t *) 918 (sc->sram + MCP_HEADER_PTR_OFFSET)); 919 920 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 921 device_printf(sc->dev, 922 "Running firmware has bad header offset (%d)\n", 923 (int)hdr_offset); 924 return EIO; 925 } 926 927 /* copy header of running firmware from SRAM to host memory to 928 * validate firmware */ 929 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 930 if (hdr == NULL) { 931 device_printf(sc->dev, "could not malloc firmware hdr\n"); 932 return ENOMEM; 933 } 934 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 935 rman_get_bushandle(sc->mem_res), 936 hdr_offset, (char *)hdr, bytes); 937 status = mxge_validate_firmware(sc, hdr); 938 free(hdr, M_DEVBUF); 939 940 /* 941 * check to see if adopted firmware has bug where adopting 942 * it will cause broadcasts to be filtered unless the NIC 943 * is kept in ALLMULTI mode 944 */ 945 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 946 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 947 sc->adopted_rx_filter_bug = 1; 948 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 949 "working around rx filter bug\n", 950 sc->fw_ver_major, sc->fw_ver_minor, 951 sc->fw_ver_tiny); 952 } 953 954 return status; 955 } 956 957 958 static int 959 mxge_load_firmware(mxge_softc_t *sc, int adopt) 960 { 961 volatile uint32_t *confirm; 962 volatile char *submit; 963 char buf_bytes[72]; 964 uint32_t *buf, size, dma_low, dma_high; 965 int status, i; 966 967 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 968 969 size = sc->sram_size; 970 status = mxge_load_firmware_helper(sc, &size); 971 if (status) { 972 if (!adopt) 973 return status; 974 /* Try to use the currently running firmware, if 975 it is new enough */ 976 status = mxge_adopt_running_firmware(sc); 977 if (status) { 978 device_printf(sc->dev, 979 "failed to adopt running firmware\n"); 980 return status; 981 } 982 device_printf(sc->dev, 983 "Successfully adopted running firmware\n"); 984 if (sc->tx_boundary == 4096) { 985 device_printf(sc->dev, 986 "Using firmware currently running on NIC" 987 ". For optimal\n"); 988 device_printf(sc->dev, 989 "performance consider loading optimized " 990 "firmware\n"); 991 } 992 sc->fw_name = mxge_fw_unaligned; 993 sc->tx_boundary = 2048; 994 return 0; 995 } 996 /* clear confirmation addr */ 997 confirm = (volatile uint32_t *)sc->cmd; 998 *confirm = 0; 999 wmb(); 1000 /* send a reload command to the bootstrap MCP, and wait for the 1001 response in the confirmation address. The firmware should 1002 write a -1 there to indicate it is alive and well 1003 */ 1004 1005 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1006 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1007 1008 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1009 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1010 buf[2] = htobe32(0xffffffff); /* confirm data */ 1011 1012 /* FIX: All newest firmware should un-protect the bottom of 1013 the sram before handoff. However, the very first interfaces 1014 do not. Therefore the handoff copy must skip the first 8 bytes 1015 */ 1016 /* where the code starts*/ 1017 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1018 buf[4] = htobe32(size - 8); /* length of code */ 1019 buf[5] = htobe32(8); /* where to copy to */ 1020 buf[6] = htobe32(0); /* where to jump to */ 1021 1022 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1023 mxge_pio_copy(submit, buf, 64); 1024 wmb(); 1025 DELAY(1000); 1026 wmb(); 1027 i = 0; 1028 while (*confirm != 0xffffffff && i < 20) { 1029 DELAY(1000*10); 1030 i++; 1031 bus_dmamap_sync(sc->cmd_dma.dmat, 1032 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1033 } 1034 if (*confirm != 0xffffffff) { 1035 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1036 confirm, *confirm); 1037 1038 return ENXIO; 1039 } 1040 return 0; 1041 } 1042 1043 static int 1044 mxge_update_mac_address(mxge_softc_t *sc) 1045 { 1046 mxge_cmd_t cmd; 1047 uint8_t *addr = sc->mac_addr; 1048 int status; 1049 1050 1051 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1052 | (addr[2] << 8) | addr[3]); 1053 1054 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1055 1056 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1057 return status; 1058 } 1059 1060 static int 1061 mxge_change_pause(mxge_softc_t *sc, int pause) 1062 { 1063 mxge_cmd_t cmd; 1064 int status; 1065 1066 if (pause) 1067 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1068 &cmd); 1069 else 1070 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1071 &cmd); 1072 1073 if (status) { 1074 device_printf(sc->dev, "Failed to set flow control mode\n"); 1075 return ENXIO; 1076 } 1077 sc->pause = pause; 1078 return 0; 1079 } 1080 1081 static void 1082 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1083 { 1084 mxge_cmd_t cmd; 1085 int status; 1086 1087 if (mxge_always_promisc) 1088 promisc = 1; 1089 1090 if (promisc) 1091 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1092 &cmd); 1093 else 1094 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1095 &cmd); 1096 1097 if (status) { 1098 device_printf(sc->dev, "Failed to set promisc mode\n"); 1099 } 1100 } 1101 1102 static void 1103 mxge_set_multicast_list(mxge_softc_t *sc) 1104 { 1105 mxge_cmd_t cmd; 1106 struct ifmultiaddr *ifma; 1107 struct ifnet *ifp = sc->ifp; 1108 int err; 1109 1110 /* This firmware is known to not support multicast */ 1111 if (!sc->fw_multicast_support) 1112 return; 1113 1114 /* Disable multicast filtering while we play with the lists*/ 1115 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1116 if (err != 0) { 1117 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1118 " error status: %d\n", err); 1119 return; 1120 } 1121 1122 if (sc->adopted_rx_filter_bug) 1123 return; 1124 1125 if (ifp->if_flags & IFF_ALLMULTI) 1126 /* request to disable multicast filtering, so quit here */ 1127 return; 1128 1129 /* Flush all the filters */ 1130 1131 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1132 if (err != 0) { 1133 device_printf(sc->dev, 1134 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1135 ", error status: %d\n", err); 1136 return; 1137 } 1138 1139 /* Walk the multicast list, and add each address */ 1140 1141 if_maddr_rlock(ifp); 1142 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1143 if (ifma->ifma_addr->sa_family != AF_LINK) 1144 continue; 1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1146 &cmd.data0, 4); 1147 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1148 &cmd.data1, 2); 1149 cmd.data0 = htonl(cmd.data0); 1150 cmd.data1 = htonl(cmd.data1); 1151 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1152 if (err != 0) { 1153 device_printf(sc->dev, "Failed " 1154 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1155 "%d\t", err); 1156 /* abort, leaving multicast filtering off */ 1157 if_maddr_runlock(ifp); 1158 return; 1159 } 1160 } 1161 if_maddr_runlock(ifp); 1162 /* Enable multicast filtering */ 1163 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1164 if (err != 0) { 1165 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1166 ", error status: %d\n", err); 1167 } 1168 } 1169 1170 static int 1171 mxge_max_mtu(mxge_softc_t *sc) 1172 { 1173 mxge_cmd_t cmd; 1174 int status; 1175 1176 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1177 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1178 1179 /* try to set nbufs to see if it we can 1180 use virtually contiguous jumbos */ 1181 cmd.data0 = 0; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1183 &cmd); 1184 if (status == 0) 1185 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1186 1187 /* otherwise, we're limited to MJUMPAGESIZE */ 1188 return MJUMPAGESIZE - MXGEFW_PAD; 1189 } 1190 1191 static int 1192 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1193 { 1194 struct mxge_slice_state *ss; 1195 mxge_rx_done_t *rx_done; 1196 volatile uint32_t *irq_claim; 1197 mxge_cmd_t cmd; 1198 int slice, status; 1199 1200 /* try to send a reset command to the card to see if it 1201 is alive */ 1202 memset(&cmd, 0, sizeof (cmd)); 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1204 if (status != 0) { 1205 device_printf(sc->dev, "failed reset\n"); 1206 return ENXIO; 1207 } 1208 1209 mxge_dummy_rdma(sc, 1); 1210 1211 1212 /* set the intrq size */ 1213 cmd.data0 = sc->rx_ring_size; 1214 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1215 1216 /* 1217 * Even though we already know how many slices are supported 1218 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1219 * has magic side effects, and must be called after a reset. 1220 * It must be called prior to calling any RSS related cmds, 1221 * including assigning an interrupt queue for anything but 1222 * slice 0. It must also be called *after* 1223 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1224 * the firmware to compute offsets. 1225 */ 1226 1227 if (sc->num_slices > 1) { 1228 /* ask the maximum number of slices it supports */ 1229 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1230 &cmd); 1231 if (status != 0) { 1232 device_printf(sc->dev, 1233 "failed to get number of slices\n"); 1234 return status; 1235 } 1236 /* 1237 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1238 * to setting up the interrupt queue DMA 1239 */ 1240 cmd.data0 = sc->num_slices; 1241 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1242 #ifdef IFNET_BUF_RING 1243 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1244 #endif 1245 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1246 &cmd); 1247 if (status != 0) { 1248 device_printf(sc->dev, 1249 "failed to set number of slices\n"); 1250 return status; 1251 } 1252 } 1253 1254 1255 if (interrupts_setup) { 1256 /* Now exchange information about interrupts */ 1257 for (slice = 0; slice < sc->num_slices; slice++) { 1258 rx_done = &sc->ss[slice].rx_done; 1259 memset(rx_done->entry, 0, sc->rx_ring_size); 1260 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1261 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1262 cmd.data2 = slice; 1263 status |= mxge_send_cmd(sc, 1264 MXGEFW_CMD_SET_INTRQ_DMA, 1265 &cmd); 1266 } 1267 } 1268 1269 status |= mxge_send_cmd(sc, 1270 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1271 1272 1273 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1276 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1277 1278 1279 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1280 &cmd); 1281 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1282 if (status != 0) { 1283 device_printf(sc->dev, "failed set interrupt parameters\n"); 1284 return status; 1285 } 1286 1287 1288 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1289 1290 1291 /* run a DMA benchmark */ 1292 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1293 1294 for (slice = 0; slice < sc->num_slices; slice++) { 1295 ss = &sc->ss[slice]; 1296 1297 ss->irq_claim = irq_claim + (2 * slice); 1298 /* reset mcp/driver shared state back to 0 */ 1299 ss->rx_done.idx = 0; 1300 ss->rx_done.cnt = 0; 1301 ss->tx.req = 0; 1302 ss->tx.done = 0; 1303 ss->tx.pkt_done = 0; 1304 ss->tx.queue_active = 0; 1305 ss->tx.activate = 0; 1306 ss->tx.deactivate = 0; 1307 ss->tx.wake = 0; 1308 ss->tx.defrag = 0; 1309 ss->tx.stall = 0; 1310 ss->rx_big.cnt = 0; 1311 ss->rx_small.cnt = 0; 1312 ss->lro_bad_csum = 0; 1313 ss->lro_queued = 0; 1314 ss->lro_flushed = 0; 1315 if (ss->fw_stats != NULL) { 1316 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1317 } 1318 } 1319 sc->rdma_tags_available = 15; 1320 status = mxge_update_mac_address(sc); 1321 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1322 mxge_change_pause(sc, sc->pause); 1323 mxge_set_multicast_list(sc); 1324 if (sc->throttle) { 1325 cmd.data0 = sc->throttle; 1326 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1327 &cmd)) { 1328 device_printf(sc->dev, 1329 "can't enable throttle\n"); 1330 } 1331 } 1332 return status; 1333 } 1334 1335 static int 1336 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1337 { 1338 mxge_cmd_t cmd; 1339 mxge_softc_t *sc; 1340 int err; 1341 unsigned int throttle; 1342 1343 sc = arg1; 1344 throttle = sc->throttle; 1345 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1346 if (err != 0) { 1347 return err; 1348 } 1349 1350 if (throttle == sc->throttle) 1351 return 0; 1352 1353 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1354 return EINVAL; 1355 1356 mtx_lock(&sc->driver_mtx); 1357 cmd.data0 = throttle; 1358 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1359 if (err == 0) 1360 sc->throttle = throttle; 1361 mtx_unlock(&sc->driver_mtx); 1362 return err; 1363 } 1364 1365 static int 1366 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1367 { 1368 mxge_softc_t *sc; 1369 unsigned int intr_coal_delay; 1370 int err; 1371 1372 sc = arg1; 1373 intr_coal_delay = sc->intr_coal_delay; 1374 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1375 if (err != 0) { 1376 return err; 1377 } 1378 if (intr_coal_delay == sc->intr_coal_delay) 1379 return 0; 1380 1381 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1382 return EINVAL; 1383 1384 mtx_lock(&sc->driver_mtx); 1385 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1386 sc->intr_coal_delay = intr_coal_delay; 1387 1388 mtx_unlock(&sc->driver_mtx); 1389 return err; 1390 } 1391 1392 static int 1393 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1394 { 1395 mxge_softc_t *sc; 1396 unsigned int enabled; 1397 int err; 1398 1399 sc = arg1; 1400 enabled = sc->pause; 1401 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1402 if (err != 0) { 1403 return err; 1404 } 1405 if (enabled == sc->pause) 1406 return 0; 1407 1408 mtx_lock(&sc->driver_mtx); 1409 err = mxge_change_pause(sc, enabled); 1410 mtx_unlock(&sc->driver_mtx); 1411 return err; 1412 } 1413 1414 static int 1415 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1416 { 1417 struct ifnet *ifp; 1418 int err = 0; 1419 1420 ifp = sc->ifp; 1421 if (lro_cnt == 0) 1422 ifp->if_capenable &= ~IFCAP_LRO; 1423 else 1424 ifp->if_capenable |= IFCAP_LRO; 1425 sc->lro_cnt = lro_cnt; 1426 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1427 mxge_close(sc, 0); 1428 err = mxge_open(sc); 1429 } 1430 return err; 1431 } 1432 1433 static int 1434 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1435 { 1436 mxge_softc_t *sc; 1437 unsigned int lro_cnt; 1438 int err; 1439 1440 sc = arg1; 1441 lro_cnt = sc->lro_cnt; 1442 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1443 if (err != 0) 1444 return err; 1445 1446 if (lro_cnt == sc->lro_cnt) 1447 return 0; 1448 1449 if (lro_cnt > 128) 1450 return EINVAL; 1451 1452 mtx_lock(&sc->driver_mtx); 1453 err = mxge_change_lro_locked(sc, lro_cnt); 1454 mtx_unlock(&sc->driver_mtx); 1455 return err; 1456 } 1457 1458 static int 1459 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1460 { 1461 int err; 1462 1463 if (arg1 == NULL) 1464 return EFAULT; 1465 arg2 = be32toh(*(int *)arg1); 1466 arg1 = NULL; 1467 err = sysctl_handle_int(oidp, arg1, arg2, req); 1468 1469 return err; 1470 } 1471 1472 static void 1473 mxge_rem_sysctls(mxge_softc_t *sc) 1474 { 1475 struct mxge_slice_state *ss; 1476 int slice; 1477 1478 if (sc->slice_sysctl_tree == NULL) 1479 return; 1480 1481 for (slice = 0; slice < sc->num_slices; slice++) { 1482 ss = &sc->ss[slice]; 1483 if (ss == NULL || ss->sysctl_tree == NULL) 1484 continue; 1485 sysctl_ctx_free(&ss->sysctl_ctx); 1486 ss->sysctl_tree = NULL; 1487 } 1488 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1489 sc->slice_sysctl_tree = NULL; 1490 } 1491 1492 static void 1493 mxge_add_sysctls(mxge_softc_t *sc) 1494 { 1495 struct sysctl_ctx_list *ctx; 1496 struct sysctl_oid_list *children; 1497 mcp_irq_data_t *fw; 1498 struct mxge_slice_state *ss; 1499 int slice; 1500 char slice_num[8]; 1501 1502 ctx = device_get_sysctl_ctx(sc->dev); 1503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1504 fw = sc->ss[0].fw_stats; 1505 1506 /* random information */ 1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1508 "firmware_version", 1509 CTLFLAG_RD, &sc->fw_version, 1510 0, "firmware version"); 1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1512 "serial_number", 1513 CTLFLAG_RD, &sc->serial_number_string, 1514 0, "serial number"); 1515 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1516 "product_code", 1517 CTLFLAG_RD, &sc->product_code_string, 1518 0, "product_code"); 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1520 "pcie_link_width", 1521 CTLFLAG_RD, &sc->link_width, 1522 0, "tx_boundary"); 1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1524 "tx_boundary", 1525 CTLFLAG_RD, &sc->tx_boundary, 1526 0, "tx_boundary"); 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "write_combine", 1529 CTLFLAG_RD, &sc->wc, 1530 0, "write combining PIO?"); 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "read_dma_MBs", 1533 CTLFLAG_RD, &sc->read_dma, 1534 0, "DMA Read speed in MB/s"); 1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1536 "write_dma_MBs", 1537 CTLFLAG_RD, &sc->write_dma, 1538 0, "DMA Write speed in MB/s"); 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1540 "read_write_dma_MBs", 1541 CTLFLAG_RD, &sc->read_write_dma, 1542 0, "DMA concurrent Read/Write speed in MB/s"); 1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1544 "watchdog_resets", 1545 CTLFLAG_RD, &sc->watchdog_resets, 1546 0, "Number of times NIC was reset"); 1547 1548 1549 /* performance related tunables */ 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "intr_coal_delay", 1552 CTLTYPE_INT|CTLFLAG_RW, sc, 1553 0, mxge_change_intr_coal, 1554 "I", "interrupt coalescing delay in usecs"); 1555 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "throttle", 1558 CTLTYPE_INT|CTLFLAG_RW, sc, 1559 0, mxge_change_throttle, 1560 "I", "transmit throttling"); 1561 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "flow_control_enabled", 1564 CTLTYPE_INT|CTLFLAG_RW, sc, 1565 0, mxge_change_flow_control, 1566 "I", "interrupt coalescing delay in usecs"); 1567 1568 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1569 "deassert_wait", 1570 CTLFLAG_RW, &mxge_deassert_wait, 1571 0, "Wait for IRQ line to go low in ihandler"); 1572 1573 /* stats block from firmware is in network byte order. 1574 Need to swap it */ 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "link_up", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1578 0, mxge_handle_be32, 1579 "I", "link up"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "rdma_tags_available", 1582 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1583 0, mxge_handle_be32, 1584 "I", "rdma_tags_available"); 1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1586 "dropped_bad_crc32", 1587 CTLTYPE_INT|CTLFLAG_RD, 1588 &fw->dropped_bad_crc32, 1589 0, mxge_handle_be32, 1590 "I", "dropped_bad_crc32"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_bad_phy", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_bad_phy, 1595 0, mxge_handle_be32, 1596 "I", "dropped_bad_phy"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_link_error_or_filtered", 1599 CTLTYPE_INT|CTLFLAG_RD, 1600 &fw->dropped_link_error_or_filtered, 1601 0, mxge_handle_be32, 1602 "I", "dropped_link_error_or_filtered"); 1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1604 "dropped_link_overflow", 1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1606 0, mxge_handle_be32, 1607 "I", "dropped_link_overflow"); 1608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1609 "dropped_multicast_filtered", 1610 CTLTYPE_INT|CTLFLAG_RD, 1611 &fw->dropped_multicast_filtered, 1612 0, mxge_handle_be32, 1613 "I", "dropped_multicast_filtered"); 1614 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1615 "dropped_no_big_buffer", 1616 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1617 0, mxge_handle_be32, 1618 "I", "dropped_no_big_buffer"); 1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1620 "dropped_no_small_buffer", 1621 CTLTYPE_INT|CTLFLAG_RD, 1622 &fw->dropped_no_small_buffer, 1623 0, mxge_handle_be32, 1624 "I", "dropped_no_small_buffer"); 1625 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1626 "dropped_overrun", 1627 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1628 0, mxge_handle_be32, 1629 "I", "dropped_overrun"); 1630 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1631 "dropped_pause", 1632 CTLTYPE_INT|CTLFLAG_RD, 1633 &fw->dropped_pause, 1634 0, mxge_handle_be32, 1635 "I", "dropped_pause"); 1636 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1637 "dropped_runt", 1638 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1639 0, mxge_handle_be32, 1640 "I", "dropped_runt"); 1641 1642 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1643 "dropped_unicast_filtered", 1644 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1645 0, mxge_handle_be32, 1646 "I", "dropped_unicast_filtered"); 1647 1648 /* verbose printing? */ 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "verbose", 1651 CTLFLAG_RW, &mxge_verbose, 1652 0, "verbose printing"); 1653 1654 /* lro */ 1655 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1656 "lro_cnt", 1657 CTLTYPE_INT|CTLFLAG_RW, sc, 1658 0, mxge_change_lro, 1659 "I", "number of lro merge queues"); 1660 1661 1662 /* add counters exported for debugging from all slices */ 1663 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1664 sc->slice_sysctl_tree = 1665 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1666 "slice", CTLFLAG_RD, 0, ""); 1667 1668 for (slice = 0; slice < sc->num_slices; slice++) { 1669 ss = &sc->ss[slice]; 1670 sysctl_ctx_init(&ss->sysctl_ctx); 1671 ctx = &ss->sysctl_ctx; 1672 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1673 sprintf(slice_num, "%d", slice); 1674 ss->sysctl_tree = 1675 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1676 CTLFLAG_RD, 0, ""); 1677 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "rx_small_cnt", 1680 CTLFLAG_RD, &ss->rx_small.cnt, 1681 0, "rx_small_cnt"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "rx_big_cnt", 1684 CTLFLAG_RD, &ss->rx_big.cnt, 1685 0, "rx_small_cnt"); 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1688 0, "number of lro merge queues flushed"); 1689 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1692 0, "number of frames appended to lro merge" 1693 "queues"); 1694 1695 #ifndef IFNET_BUF_RING 1696 /* only transmit from slice 0 for now */ 1697 if (slice > 0) 1698 continue; 1699 #endif 1700 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1701 "tx_req", 1702 CTLFLAG_RD, &ss->tx.req, 1703 0, "tx_req"); 1704 1705 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1706 "tx_done", 1707 CTLFLAG_RD, &ss->tx.done, 1708 0, "tx_done"); 1709 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1710 "tx_pkt_done", 1711 CTLFLAG_RD, &ss->tx.pkt_done, 1712 0, "tx_done"); 1713 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1714 "tx_stall", 1715 CTLFLAG_RD, &ss->tx.stall, 1716 0, "tx_stall"); 1717 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1718 "tx_wake", 1719 CTLFLAG_RD, &ss->tx.wake, 1720 0, "tx_wake"); 1721 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1722 "tx_defrag", 1723 CTLFLAG_RD, &ss->tx.defrag, 1724 0, "tx_defrag"); 1725 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1726 "tx_queue_active", 1727 CTLFLAG_RD, &ss->tx.queue_active, 1728 0, "tx_queue_active"); 1729 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1730 "tx_activate", 1731 CTLFLAG_RD, &ss->tx.activate, 1732 0, "tx_activate"); 1733 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1734 "tx_deactivate", 1735 CTLFLAG_RD, &ss->tx.deactivate, 1736 0, "tx_deactivate"); 1737 } 1738 } 1739 1740 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1741 backwards one at a time and handle ring wraps */ 1742 1743 static inline void 1744 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1745 mcp_kreq_ether_send_t *src, int cnt) 1746 { 1747 int idx, starting_slot; 1748 starting_slot = tx->req; 1749 while (cnt > 1) { 1750 cnt--; 1751 idx = (starting_slot + cnt) & tx->mask; 1752 mxge_pio_copy(&tx->lanai[idx], 1753 &src[cnt], sizeof(*src)); 1754 wmb(); 1755 } 1756 } 1757 1758 /* 1759 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1760 * at most 32 bytes at a time, so as to avoid involving the software 1761 * pio handler in the nic. We re-write the first segment's flags 1762 * to mark them valid only after writing the entire chain 1763 */ 1764 1765 static inline void 1766 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1767 int cnt) 1768 { 1769 int idx, i; 1770 uint32_t *src_ints; 1771 volatile uint32_t *dst_ints; 1772 mcp_kreq_ether_send_t *srcp; 1773 volatile mcp_kreq_ether_send_t *dstp, *dst; 1774 uint8_t last_flags; 1775 1776 idx = tx->req & tx->mask; 1777 1778 last_flags = src->flags; 1779 src->flags = 0; 1780 wmb(); 1781 dst = dstp = &tx->lanai[idx]; 1782 srcp = src; 1783 1784 if ((idx + cnt) < tx->mask) { 1785 for (i = 0; i < (cnt - 1); i += 2) { 1786 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1787 wmb(); /* force write every 32 bytes */ 1788 srcp += 2; 1789 dstp += 2; 1790 } 1791 } else { 1792 /* submit all but the first request, and ensure 1793 that it is submitted below */ 1794 mxge_submit_req_backwards(tx, src, cnt); 1795 i = 0; 1796 } 1797 if (i < cnt) { 1798 /* submit the first request */ 1799 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1800 wmb(); /* barrier before setting valid flag */ 1801 } 1802 1803 /* re-write the last 32-bits with the valid flags */ 1804 src->flags = last_flags; 1805 src_ints = (uint32_t *)src; 1806 src_ints+=3; 1807 dst_ints = (volatile uint32_t *)dst; 1808 dst_ints+=3; 1809 *dst_ints = *src_ints; 1810 tx->req += cnt; 1811 wmb(); 1812 } 1813 1814 #if IFCAP_TSO4 1815 1816 static void 1817 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1818 int busdma_seg_cnt, int ip_off) 1819 { 1820 mxge_tx_ring_t *tx; 1821 mcp_kreq_ether_send_t *req; 1822 bus_dma_segment_t *seg; 1823 struct ip *ip; 1824 struct tcphdr *tcp; 1825 uint32_t low, high_swapped; 1826 int len, seglen, cum_len, cum_len_next; 1827 int next_is_first, chop, cnt, rdma_count, small; 1828 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1829 uint8_t flags, flags_next; 1830 static int once; 1831 1832 mss = m->m_pkthdr.tso_segsz; 1833 1834 /* negative cum_len signifies to the 1835 * send loop that we are still in the 1836 * header portion of the TSO packet. 1837 */ 1838 1839 /* ensure we have the ethernet, IP and TCP 1840 header together in the first mbuf, copy 1841 it to a scratch buffer if not */ 1842 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1843 m_copydata(m, 0, ip_off + sizeof (*ip), 1844 ss->scratch); 1845 ip = (struct ip *)(ss->scratch + ip_off); 1846 } else { 1847 ip = (struct ip *)(mtod(m, char *) + ip_off); 1848 } 1849 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1850 + sizeof (*tcp))) { 1851 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1852 + sizeof (*tcp), ss->scratch); 1853 ip = (struct ip *)(mtod(m, char *) + ip_off); 1854 } 1855 1856 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1857 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1858 1859 /* TSO implies checksum offload on this hardware */ 1860 cksum_offset = ip_off + (ip->ip_hl << 2); 1861 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1862 1863 1864 /* for TSO, pseudo_hdr_offset holds mss. 1865 * The firmware figures out where to put 1866 * the checksum by parsing the header. */ 1867 pseudo_hdr_offset = htobe16(mss); 1868 1869 tx = &ss->tx; 1870 req = tx->req_list; 1871 seg = tx->seg_list; 1872 cnt = 0; 1873 rdma_count = 0; 1874 /* "rdma_count" is the number of RDMAs belonging to the 1875 * current packet BEFORE the current send request. For 1876 * non-TSO packets, this is equal to "count". 1877 * For TSO packets, rdma_count needs to be reset 1878 * to 0 after a segment cut. 1879 * 1880 * The rdma_count field of the send request is 1881 * the number of RDMAs of the packet starting at 1882 * that request. For TSO send requests with one ore more cuts 1883 * in the middle, this is the number of RDMAs starting 1884 * after the last cut in the request. All previous 1885 * segments before the last cut implicitly have 1 RDMA. 1886 * 1887 * Since the number of RDMAs is not known beforehand, 1888 * it must be filled-in retroactively - after each 1889 * segmentation cut or at the end of the entire packet. 1890 */ 1891 1892 while (busdma_seg_cnt) { 1893 /* Break the busdma segment up into pieces*/ 1894 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1895 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1896 len = seg->ds_len; 1897 1898 while (len) { 1899 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1900 seglen = len; 1901 cum_len_next = cum_len + seglen; 1902 (req-rdma_count)->rdma_count = rdma_count + 1; 1903 if (__predict_true(cum_len >= 0)) { 1904 /* payload */ 1905 chop = (cum_len_next > mss); 1906 cum_len_next = cum_len_next % mss; 1907 next_is_first = (cum_len_next == 0); 1908 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1909 flags_next |= next_is_first * 1910 MXGEFW_FLAGS_FIRST; 1911 rdma_count |= -(chop | next_is_first); 1912 rdma_count += chop & !next_is_first; 1913 } else if (cum_len_next >= 0) { 1914 /* header ends */ 1915 rdma_count = -1; 1916 cum_len_next = 0; 1917 seglen = -cum_len; 1918 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1919 flags_next = MXGEFW_FLAGS_TSO_PLD | 1920 MXGEFW_FLAGS_FIRST | 1921 (small * MXGEFW_FLAGS_SMALL); 1922 } 1923 1924 req->addr_high = high_swapped; 1925 req->addr_low = htobe32(low); 1926 req->pseudo_hdr_offset = pseudo_hdr_offset; 1927 req->pad = 0; 1928 req->rdma_count = 1; 1929 req->length = htobe16(seglen); 1930 req->cksum_offset = cksum_offset; 1931 req->flags = flags | ((cum_len & 1) * 1932 MXGEFW_FLAGS_ALIGN_ODD); 1933 low += seglen; 1934 len -= seglen; 1935 cum_len = cum_len_next; 1936 flags = flags_next; 1937 req++; 1938 cnt++; 1939 rdma_count++; 1940 if (__predict_false(cksum_offset > seglen)) 1941 cksum_offset -= seglen; 1942 else 1943 cksum_offset = 0; 1944 if (__predict_false(cnt > tx->max_desc)) 1945 goto drop; 1946 } 1947 busdma_seg_cnt--; 1948 seg++; 1949 } 1950 (req-rdma_count)->rdma_count = rdma_count; 1951 1952 do { 1953 req--; 1954 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1955 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1956 1957 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1958 mxge_submit_req(tx, tx->req_list, cnt); 1959 #ifdef IFNET_BUF_RING 1960 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1961 /* tell the NIC to start polling this slice */ 1962 *tx->send_go = 1; 1963 tx->queue_active = 1; 1964 tx->activate++; 1965 wmb(); 1966 } 1967 #endif 1968 return; 1969 1970 drop: 1971 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1972 m_freem(m); 1973 ss->oerrors++; 1974 if (!once) { 1975 printf("tx->max_desc exceeded via TSO!\n"); 1976 printf("mss = %d, %ld, %d!\n", mss, 1977 (long)seg - (long)tx->seg_list, tx->max_desc); 1978 once = 1; 1979 } 1980 return; 1981 1982 } 1983 1984 #endif /* IFCAP_TSO4 */ 1985 1986 #ifdef MXGE_NEW_VLAN_API 1987 /* 1988 * We reproduce the software vlan tag insertion from 1989 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1990 * vlan tag insertion. We need to advertise this in order to have the 1991 * vlan interface respect our csum offload flags. 1992 */ 1993 static struct mbuf * 1994 mxge_vlan_tag_insert(struct mbuf *m) 1995 { 1996 struct ether_vlan_header *evl; 1997 1998 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1999 if (__predict_false(m == NULL)) 2000 return NULL; 2001 if (m->m_len < sizeof(*evl)) { 2002 m = m_pullup(m, sizeof(*evl)); 2003 if (__predict_false(m == NULL)) 2004 return NULL; 2005 } 2006 /* 2007 * Transform the Ethernet header into an Ethernet header 2008 * with 802.1Q encapsulation. 2009 */ 2010 evl = mtod(m, struct ether_vlan_header *); 2011 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2012 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2013 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2014 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2015 m->m_flags &= ~M_VLANTAG; 2016 return m; 2017 } 2018 #endif /* MXGE_NEW_VLAN_API */ 2019 2020 static void 2021 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2022 { 2023 mxge_softc_t *sc; 2024 mcp_kreq_ether_send_t *req; 2025 bus_dma_segment_t *seg; 2026 struct mbuf *m_tmp; 2027 struct ifnet *ifp; 2028 mxge_tx_ring_t *tx; 2029 struct ip *ip; 2030 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2031 uint16_t pseudo_hdr_offset; 2032 uint8_t flags, cksum_offset; 2033 2034 2035 sc = ss->sc; 2036 ifp = sc->ifp; 2037 tx = &ss->tx; 2038 2039 ip_off = sizeof (struct ether_header); 2040 #ifdef MXGE_NEW_VLAN_API 2041 if (m->m_flags & M_VLANTAG) { 2042 m = mxge_vlan_tag_insert(m); 2043 if (__predict_false(m == NULL)) 2044 goto drop; 2045 ip_off += ETHER_VLAN_ENCAP_LEN; 2046 } 2047 #endif 2048 /* (try to) map the frame for DMA */ 2049 idx = tx->req & tx->mask; 2050 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2051 m, tx->seg_list, &cnt, 2052 BUS_DMA_NOWAIT); 2053 if (__predict_false(err == EFBIG)) { 2054 /* Too many segments in the chain. Try 2055 to defrag */ 2056 m_tmp = m_defrag(m, M_NOWAIT); 2057 if (m_tmp == NULL) { 2058 goto drop; 2059 } 2060 ss->tx.defrag++; 2061 m = m_tmp; 2062 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2063 tx->info[idx].map, 2064 m, tx->seg_list, &cnt, 2065 BUS_DMA_NOWAIT); 2066 } 2067 if (__predict_false(err != 0)) { 2068 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2069 " packet len = %d\n", err, m->m_pkthdr.len); 2070 goto drop; 2071 } 2072 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2073 BUS_DMASYNC_PREWRITE); 2074 tx->info[idx].m = m; 2075 2076 #if IFCAP_TSO4 2077 /* TSO is different enough, we handle it in another routine */ 2078 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2079 mxge_encap_tso(ss, m, cnt, ip_off); 2080 return; 2081 } 2082 #endif 2083 2084 req = tx->req_list; 2085 cksum_offset = 0; 2086 pseudo_hdr_offset = 0; 2087 flags = MXGEFW_FLAGS_NO_TSO; 2088 2089 /* checksum offloading? */ 2090 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2091 /* ensure ip header is in first mbuf, copy 2092 it to a scratch buffer if not */ 2093 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2094 m_copydata(m, 0, ip_off + sizeof (*ip), 2095 ss->scratch); 2096 ip = (struct ip *)(ss->scratch + ip_off); 2097 } else { 2098 ip = (struct ip *)(mtod(m, char *) + ip_off); 2099 } 2100 cksum_offset = ip_off + (ip->ip_hl << 2); 2101 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2102 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2103 req->cksum_offset = cksum_offset; 2104 flags |= MXGEFW_FLAGS_CKSUM; 2105 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2106 } else { 2107 odd_flag = 0; 2108 } 2109 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2110 flags |= MXGEFW_FLAGS_SMALL; 2111 2112 /* convert segments into a request list */ 2113 cum_len = 0; 2114 seg = tx->seg_list; 2115 req->flags = MXGEFW_FLAGS_FIRST; 2116 for (i = 0; i < cnt; i++) { 2117 req->addr_low = 2118 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2119 req->addr_high = 2120 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2121 req->length = htobe16(seg->ds_len); 2122 req->cksum_offset = cksum_offset; 2123 if (cksum_offset > seg->ds_len) 2124 cksum_offset -= seg->ds_len; 2125 else 2126 cksum_offset = 0; 2127 req->pseudo_hdr_offset = pseudo_hdr_offset; 2128 req->pad = 0; /* complete solid 16-byte block */ 2129 req->rdma_count = 1; 2130 req->flags |= flags | ((cum_len & 1) * odd_flag); 2131 cum_len += seg->ds_len; 2132 seg++; 2133 req++; 2134 req->flags = 0; 2135 } 2136 req--; 2137 /* pad runts to 60 bytes */ 2138 if (cum_len < 60) { 2139 req++; 2140 req->addr_low = 2141 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2142 req->addr_high = 2143 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2144 req->length = htobe16(60 - cum_len); 2145 req->cksum_offset = 0; 2146 req->pseudo_hdr_offset = pseudo_hdr_offset; 2147 req->pad = 0; /* complete solid 16-byte block */ 2148 req->rdma_count = 1; 2149 req->flags |= flags | ((cum_len & 1) * odd_flag); 2150 cnt++; 2151 } 2152 2153 tx->req_list[0].rdma_count = cnt; 2154 #if 0 2155 /* print what the firmware will see */ 2156 for (i = 0; i < cnt; i++) { 2157 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2158 "cso:%d, flags:0x%x, rdma:%d\n", 2159 i, (int)ntohl(tx->req_list[i].addr_high), 2160 (int)ntohl(tx->req_list[i].addr_low), 2161 (int)ntohs(tx->req_list[i].length), 2162 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2163 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2164 tx->req_list[i].rdma_count); 2165 } 2166 printf("--------------\n"); 2167 #endif 2168 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2169 mxge_submit_req(tx, tx->req_list, cnt); 2170 #ifdef IFNET_BUF_RING 2171 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2172 /* tell the NIC to start polling this slice */ 2173 *tx->send_go = 1; 2174 tx->queue_active = 1; 2175 tx->activate++; 2176 wmb(); 2177 } 2178 #endif 2179 return; 2180 2181 drop: 2182 m_freem(m); 2183 ss->oerrors++; 2184 return; 2185 } 2186 2187 #ifdef IFNET_BUF_RING 2188 static void 2189 mxge_qflush(struct ifnet *ifp) 2190 { 2191 mxge_softc_t *sc = ifp->if_softc; 2192 mxge_tx_ring_t *tx; 2193 struct mbuf *m; 2194 int slice; 2195 2196 for (slice = 0; slice < sc->num_slices; slice++) { 2197 tx = &sc->ss[slice].tx; 2198 mtx_lock(&tx->mtx); 2199 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2200 m_freem(m); 2201 mtx_unlock(&tx->mtx); 2202 } 2203 if_qflush(ifp); 2204 } 2205 2206 static inline void 2207 mxge_start_locked(struct mxge_slice_state *ss) 2208 { 2209 mxge_softc_t *sc; 2210 struct mbuf *m; 2211 struct ifnet *ifp; 2212 mxge_tx_ring_t *tx; 2213 2214 sc = ss->sc; 2215 ifp = sc->ifp; 2216 tx = &ss->tx; 2217 2218 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2219 m = drbr_dequeue(ifp, tx->br); 2220 if (m == NULL) { 2221 return; 2222 } 2223 /* let BPF see it */ 2224 BPF_MTAP(ifp, m); 2225 2226 /* give it to the nic */ 2227 mxge_encap(ss, m); 2228 } 2229 /* ran out of transmit slots */ 2230 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2231 && (!drbr_empty(ifp, tx->br))) { 2232 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2233 tx->stall++; 2234 } 2235 } 2236 2237 static int 2238 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2239 { 2240 mxge_softc_t *sc; 2241 struct ifnet *ifp; 2242 mxge_tx_ring_t *tx; 2243 int err; 2244 2245 sc = ss->sc; 2246 ifp = sc->ifp; 2247 tx = &ss->tx; 2248 2249 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2250 IFF_DRV_RUNNING) { 2251 err = drbr_enqueue(ifp, tx->br, m); 2252 return (err); 2253 } 2254 2255 if (!drbr_needs_enqueue(ifp, tx->br) && 2256 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2257 /* let BPF see it */ 2258 BPF_MTAP(ifp, m); 2259 /* give it to the nic */ 2260 mxge_encap(ss, m); 2261 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2262 return (err); 2263 } 2264 if (!drbr_empty(ifp, tx->br)) 2265 mxge_start_locked(ss); 2266 return (0); 2267 } 2268 2269 static int 2270 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2271 { 2272 mxge_softc_t *sc = ifp->if_softc; 2273 struct mxge_slice_state *ss; 2274 mxge_tx_ring_t *tx; 2275 int err = 0; 2276 int slice; 2277 2278 slice = m->m_pkthdr.flowid; 2279 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2280 2281 ss = &sc->ss[slice]; 2282 tx = &ss->tx; 2283 2284 if (mtx_trylock(&tx->mtx)) { 2285 err = mxge_transmit_locked(ss, m); 2286 mtx_unlock(&tx->mtx); 2287 } else { 2288 err = drbr_enqueue(ifp, tx->br, m); 2289 } 2290 2291 return (err); 2292 } 2293 2294 #else 2295 2296 static inline void 2297 mxge_start_locked(struct mxge_slice_state *ss) 2298 { 2299 mxge_softc_t *sc; 2300 struct mbuf *m; 2301 struct ifnet *ifp; 2302 mxge_tx_ring_t *tx; 2303 2304 sc = ss->sc; 2305 ifp = sc->ifp; 2306 tx = &ss->tx; 2307 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2308 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2309 if (m == NULL) { 2310 return; 2311 } 2312 /* let BPF see it */ 2313 BPF_MTAP(ifp, m); 2314 2315 /* give it to the nic */ 2316 mxge_encap(ss, m); 2317 } 2318 /* ran out of transmit slots */ 2319 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2320 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2321 tx->stall++; 2322 } 2323 } 2324 #endif 2325 static void 2326 mxge_start(struct ifnet *ifp) 2327 { 2328 mxge_softc_t *sc = ifp->if_softc; 2329 struct mxge_slice_state *ss; 2330 2331 /* only use the first slice for now */ 2332 ss = &sc->ss[0]; 2333 mtx_lock(&ss->tx.mtx); 2334 mxge_start_locked(ss); 2335 mtx_unlock(&ss->tx.mtx); 2336 } 2337 2338 /* 2339 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2340 * at most 32 bytes at a time, so as to avoid involving the software 2341 * pio handler in the nic. We re-write the first segment's low 2342 * DMA address to mark it valid only after we write the entire chunk 2343 * in a burst 2344 */ 2345 static inline void 2346 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2347 mcp_kreq_ether_recv_t *src) 2348 { 2349 uint32_t low; 2350 2351 low = src->addr_low; 2352 src->addr_low = 0xffffffff; 2353 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2354 wmb(); 2355 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2356 wmb(); 2357 src->addr_low = low; 2358 dst->addr_low = low; 2359 wmb(); 2360 } 2361 2362 static int 2363 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2364 { 2365 bus_dma_segment_t seg; 2366 struct mbuf *m; 2367 mxge_rx_ring_t *rx = &ss->rx_small; 2368 int cnt, err; 2369 2370 m = m_gethdr(M_DONTWAIT, MT_DATA); 2371 if (m == NULL) { 2372 rx->alloc_fail++; 2373 err = ENOBUFS; 2374 goto done; 2375 } 2376 m->m_len = MHLEN; 2377 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2378 &seg, &cnt, BUS_DMA_NOWAIT); 2379 if (err != 0) { 2380 m_free(m); 2381 goto done; 2382 } 2383 rx->info[idx].m = m; 2384 rx->shadow[idx].addr_low = 2385 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2386 rx->shadow[idx].addr_high = 2387 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2388 2389 done: 2390 if ((idx & 7) == 7) 2391 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2392 return err; 2393 } 2394 2395 static int 2396 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2397 { 2398 bus_dma_segment_t seg[3]; 2399 struct mbuf *m; 2400 mxge_rx_ring_t *rx = &ss->rx_big; 2401 int cnt, err, i; 2402 2403 if (rx->cl_size == MCLBYTES) 2404 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2405 else 2406 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2407 if (m == NULL) { 2408 rx->alloc_fail++; 2409 err = ENOBUFS; 2410 goto done; 2411 } 2412 m->m_len = rx->mlen; 2413 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2414 seg, &cnt, BUS_DMA_NOWAIT); 2415 if (err != 0) { 2416 m_free(m); 2417 goto done; 2418 } 2419 rx->info[idx].m = m; 2420 rx->shadow[idx].addr_low = 2421 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2422 rx->shadow[idx].addr_high = 2423 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2424 2425 #if MXGE_VIRT_JUMBOS 2426 for (i = 1; i < cnt; i++) { 2427 rx->shadow[idx + i].addr_low = 2428 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2429 rx->shadow[idx + i].addr_high = 2430 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2431 } 2432 #endif 2433 2434 done: 2435 for (i = 0; i < rx->nbufs; i++) { 2436 if ((idx & 7) == 7) { 2437 mxge_submit_8rx(&rx->lanai[idx - 7], 2438 &rx->shadow[idx - 7]); 2439 } 2440 idx++; 2441 } 2442 return err; 2443 } 2444 2445 /* 2446 * Myri10GE hardware checksums are not valid if the sender 2447 * padded the frame with non-zero padding. This is because 2448 * the firmware just does a simple 16-bit 1s complement 2449 * checksum across the entire frame, excluding the first 14 2450 * bytes. It is best to simply to check the checksum and 2451 * tell the stack about it only if the checksum is good 2452 */ 2453 2454 static inline uint16_t 2455 mxge_rx_csum(struct mbuf *m, int csum) 2456 { 2457 struct ether_header *eh; 2458 struct ip *ip; 2459 uint16_t c; 2460 2461 eh = mtod(m, struct ether_header *); 2462 2463 /* only deal with IPv4 TCP & UDP for now */ 2464 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2465 return 1; 2466 ip = (struct ip *)(eh + 1); 2467 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2468 ip->ip_p != IPPROTO_UDP)) 2469 return 1; 2470 #ifdef INET 2471 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2472 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2473 - (ip->ip_hl << 2) + ip->ip_p)); 2474 #else 2475 c = 1; 2476 #endif 2477 c ^= 0xffff; 2478 return (c); 2479 } 2480 2481 static void 2482 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2483 { 2484 struct ether_vlan_header *evl; 2485 struct ether_header *eh; 2486 uint32_t partial; 2487 2488 evl = mtod(m, struct ether_vlan_header *); 2489 eh = mtod(m, struct ether_header *); 2490 2491 /* 2492 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2493 * after what the firmware thought was the end of the ethernet 2494 * header. 2495 */ 2496 2497 /* put checksum into host byte order */ 2498 *csum = ntohs(*csum); 2499 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2500 (*csum) += ~partial; 2501 (*csum) += ((*csum) < ~partial); 2502 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2503 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2504 2505 /* restore checksum to network byte order; 2506 later consumers expect this */ 2507 *csum = htons(*csum); 2508 2509 /* save the tag */ 2510 #ifdef MXGE_NEW_VLAN_API 2511 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2512 #else 2513 { 2514 struct m_tag *mtag; 2515 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2516 M_NOWAIT); 2517 if (mtag == NULL) 2518 return; 2519 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2520 m_tag_prepend(m, mtag); 2521 } 2522 2523 #endif 2524 m->m_flags |= M_VLANTAG; 2525 2526 /* 2527 * Remove the 802.1q header by copying the Ethernet 2528 * addresses over it and adjusting the beginning of 2529 * the data in the mbuf. The encapsulated Ethernet 2530 * type field is already in place. 2531 */ 2532 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2533 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2534 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2535 } 2536 2537 2538 static inline void 2539 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2540 { 2541 mxge_softc_t *sc; 2542 struct ifnet *ifp; 2543 struct mbuf *m; 2544 struct ether_header *eh; 2545 mxge_rx_ring_t *rx; 2546 bus_dmamap_t old_map; 2547 int idx; 2548 uint16_t tcpudp_csum; 2549 2550 sc = ss->sc; 2551 ifp = sc->ifp; 2552 rx = &ss->rx_big; 2553 idx = rx->cnt & rx->mask; 2554 rx->cnt += rx->nbufs; 2555 /* save a pointer to the received mbuf */ 2556 m = rx->info[idx].m; 2557 /* try to replace the received mbuf */ 2558 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2559 /* drop the frame -- the old mbuf is re-cycled */ 2560 ifp->if_ierrors++; 2561 return; 2562 } 2563 2564 /* unmap the received buffer */ 2565 old_map = rx->info[idx].map; 2566 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2567 bus_dmamap_unload(rx->dmat, old_map); 2568 2569 /* swap the bus_dmamap_t's */ 2570 rx->info[idx].map = rx->extra_map; 2571 rx->extra_map = old_map; 2572 2573 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2574 * aligned */ 2575 m->m_data += MXGEFW_PAD; 2576 2577 m->m_pkthdr.rcvif = ifp; 2578 m->m_len = m->m_pkthdr.len = len; 2579 ss->ipackets++; 2580 eh = mtod(m, struct ether_header *); 2581 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2582 mxge_vlan_tag_remove(m, &csum); 2583 } 2584 /* if the checksum is valid, mark it in the mbuf header */ 2585 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2586 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2587 return; 2588 /* otherwise, it was a UDP frame, or a TCP frame which 2589 we could not do LRO on. Tell the stack that the 2590 checksum is good */ 2591 m->m_pkthdr.csum_data = 0xffff; 2592 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2593 } 2594 /* flowid only valid if RSS hashing is enabled */ 2595 if (sc->num_slices > 1) { 2596 m->m_pkthdr.flowid = (ss - sc->ss); 2597 m->m_flags |= M_FLOWID; 2598 } 2599 /* pass the frame up the stack */ 2600 (*ifp->if_input)(ifp, m); 2601 } 2602 2603 static inline void 2604 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2605 { 2606 mxge_softc_t *sc; 2607 struct ifnet *ifp; 2608 struct ether_header *eh; 2609 struct mbuf *m; 2610 mxge_rx_ring_t *rx; 2611 bus_dmamap_t old_map; 2612 int idx; 2613 uint16_t tcpudp_csum; 2614 2615 sc = ss->sc; 2616 ifp = sc->ifp; 2617 rx = &ss->rx_small; 2618 idx = rx->cnt & rx->mask; 2619 rx->cnt++; 2620 /* save a pointer to the received mbuf */ 2621 m = rx->info[idx].m; 2622 /* try to replace the received mbuf */ 2623 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2624 /* drop the frame -- the old mbuf is re-cycled */ 2625 ifp->if_ierrors++; 2626 return; 2627 } 2628 2629 /* unmap the received buffer */ 2630 old_map = rx->info[idx].map; 2631 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2632 bus_dmamap_unload(rx->dmat, old_map); 2633 2634 /* swap the bus_dmamap_t's */ 2635 rx->info[idx].map = rx->extra_map; 2636 rx->extra_map = old_map; 2637 2638 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2639 * aligned */ 2640 m->m_data += MXGEFW_PAD; 2641 2642 m->m_pkthdr.rcvif = ifp; 2643 m->m_len = m->m_pkthdr.len = len; 2644 ss->ipackets++; 2645 eh = mtod(m, struct ether_header *); 2646 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2647 mxge_vlan_tag_remove(m, &csum); 2648 } 2649 /* if the checksum is valid, mark it in the mbuf header */ 2650 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2651 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2652 return; 2653 /* otherwise, it was a UDP frame, or a TCP frame which 2654 we could not do LRO on. Tell the stack that the 2655 checksum is good */ 2656 m->m_pkthdr.csum_data = 0xffff; 2657 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2658 } 2659 /* flowid only valid if RSS hashing is enabled */ 2660 if (sc->num_slices > 1) { 2661 m->m_pkthdr.flowid = (ss - sc->ss); 2662 m->m_flags |= M_FLOWID; 2663 } 2664 /* pass the frame up the stack */ 2665 (*ifp->if_input)(ifp, m); 2666 } 2667 2668 static inline void 2669 mxge_clean_rx_done(struct mxge_slice_state *ss) 2670 { 2671 mxge_rx_done_t *rx_done = &ss->rx_done; 2672 int limit = 0; 2673 uint16_t length; 2674 uint16_t checksum; 2675 2676 2677 while (rx_done->entry[rx_done->idx].length != 0) { 2678 length = ntohs(rx_done->entry[rx_done->idx].length); 2679 rx_done->entry[rx_done->idx].length = 0; 2680 checksum = rx_done->entry[rx_done->idx].checksum; 2681 if (length <= (MHLEN - MXGEFW_PAD)) 2682 mxge_rx_done_small(ss, length, checksum); 2683 else 2684 mxge_rx_done_big(ss, length, checksum); 2685 rx_done->cnt++; 2686 rx_done->idx = rx_done->cnt & rx_done->mask; 2687 2688 /* limit potential for livelock */ 2689 if (__predict_false(++limit > rx_done->mask / 2)) 2690 break; 2691 } 2692 #ifdef INET 2693 while (!SLIST_EMPTY(&ss->lro_active)) { 2694 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2695 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2696 mxge_lro_flush(ss, lro); 2697 } 2698 #endif 2699 } 2700 2701 2702 static inline void 2703 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2704 { 2705 struct ifnet *ifp; 2706 mxge_tx_ring_t *tx; 2707 struct mbuf *m; 2708 bus_dmamap_t map; 2709 int idx; 2710 int *flags; 2711 2712 tx = &ss->tx; 2713 ifp = ss->sc->ifp; 2714 while (tx->pkt_done != mcp_idx) { 2715 idx = tx->done & tx->mask; 2716 tx->done++; 2717 m = tx->info[idx].m; 2718 /* mbuf and DMA map only attached to the first 2719 segment per-mbuf */ 2720 if (m != NULL) { 2721 ss->obytes += m->m_pkthdr.len; 2722 if (m->m_flags & M_MCAST) 2723 ss->omcasts++; 2724 ss->opackets++; 2725 tx->info[idx].m = NULL; 2726 map = tx->info[idx].map; 2727 bus_dmamap_unload(tx->dmat, map); 2728 m_freem(m); 2729 } 2730 if (tx->info[idx].flag) { 2731 tx->info[idx].flag = 0; 2732 tx->pkt_done++; 2733 } 2734 } 2735 2736 /* If we have space, clear IFF_OACTIVE to tell the stack that 2737 its OK to send packets */ 2738 #ifdef IFNET_BUF_RING 2739 flags = &ss->if_drv_flags; 2740 #else 2741 flags = &ifp->if_drv_flags; 2742 #endif 2743 mtx_lock(&ss->tx.mtx); 2744 if ((*flags) & IFF_DRV_OACTIVE && 2745 tx->req - tx->done < (tx->mask + 1)/4) { 2746 *(flags) &= ~IFF_DRV_OACTIVE; 2747 ss->tx.wake++; 2748 mxge_start_locked(ss); 2749 } 2750 #ifdef IFNET_BUF_RING 2751 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2752 /* let the NIC stop polling this queue, since there 2753 * are no more transmits pending */ 2754 if (tx->req == tx->done) { 2755 *tx->send_stop = 1; 2756 tx->queue_active = 0; 2757 tx->deactivate++; 2758 wmb(); 2759 } 2760 } 2761 #endif 2762 mtx_unlock(&ss->tx.mtx); 2763 2764 } 2765 2766 static struct mxge_media_type mxge_xfp_media_types[] = 2767 { 2768 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2769 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2770 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2771 {0, (1 << 5), "10GBASE-ER"}, 2772 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2773 {0, (1 << 3), "10GBASE-SW"}, 2774 {0, (1 << 2), "10GBASE-LW"}, 2775 {0, (1 << 1), "10GBASE-EW"}, 2776 {0, (1 << 0), "Reserved"} 2777 }; 2778 static struct mxge_media_type mxge_sfp_media_types[] = 2779 { 2780 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2781 {0, (1 << 7), "Reserved"}, 2782 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2783 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2784 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2785 }; 2786 2787 static void 2788 mxge_media_set(mxge_softc_t *sc, int media_type) 2789 { 2790 2791 2792 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2793 0, NULL); 2794 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2795 sc->current_media = media_type; 2796 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2797 } 2798 2799 static void 2800 mxge_media_init(mxge_softc_t *sc) 2801 { 2802 char *ptr; 2803 int i; 2804 2805 ifmedia_removeall(&sc->media); 2806 mxge_media_set(sc, IFM_AUTO); 2807 2808 /* 2809 * parse the product code to deterimine the interface type 2810 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2811 * after the 3rd dash in the driver's cached copy of the 2812 * EEPROM's product code string. 2813 */ 2814 ptr = sc->product_code_string; 2815 if (ptr == NULL) { 2816 device_printf(sc->dev, "Missing product code\n"); 2817 return; 2818 } 2819 2820 for (i = 0; i < 3; i++, ptr++) { 2821 ptr = index(ptr, '-'); 2822 if (ptr == NULL) { 2823 device_printf(sc->dev, 2824 "only %d dashes in PC?!?\n", i); 2825 return; 2826 } 2827 } 2828 if (*ptr == 'C') { 2829 /* -C is CX4 */ 2830 sc->connector = MXGE_CX4; 2831 mxge_media_set(sc, IFM_10G_CX4); 2832 } else if (*ptr == 'Q') { 2833 /* -Q is Quad Ribbon Fiber */ 2834 sc->connector = MXGE_QRF; 2835 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2836 /* FreeBSD has no media type for Quad ribbon fiber */ 2837 } else if (*ptr == 'R') { 2838 /* -R is XFP */ 2839 sc->connector = MXGE_XFP; 2840 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2841 /* -S or -2S is SFP+ */ 2842 sc->connector = MXGE_SFP; 2843 } else { 2844 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2845 } 2846 } 2847 2848 /* 2849 * Determine the media type for a NIC. Some XFPs will identify 2850 * themselves only when their link is up, so this is initiated via a 2851 * link up interrupt. However, this can potentially take up to 2852 * several milliseconds, so it is run via the watchdog routine, rather 2853 * than in the interrupt handler itself. 2854 */ 2855 static void 2856 mxge_media_probe(mxge_softc_t *sc) 2857 { 2858 mxge_cmd_t cmd; 2859 char *cage_type; 2860 2861 struct mxge_media_type *mxge_media_types = NULL; 2862 int i, err, ms, mxge_media_type_entries; 2863 uint32_t byte; 2864 2865 sc->need_media_probe = 0; 2866 2867 if (sc->connector == MXGE_XFP) { 2868 /* -R is XFP */ 2869 mxge_media_types = mxge_xfp_media_types; 2870 mxge_media_type_entries = 2871 sizeof (mxge_xfp_media_types) / 2872 sizeof (mxge_xfp_media_types[0]); 2873 byte = MXGE_XFP_COMPLIANCE_BYTE; 2874 cage_type = "XFP"; 2875 } else if (sc->connector == MXGE_SFP) { 2876 /* -S or -2S is SFP+ */ 2877 mxge_media_types = mxge_sfp_media_types; 2878 mxge_media_type_entries = 2879 sizeof (mxge_sfp_media_types) / 2880 sizeof (mxge_sfp_media_types[0]); 2881 cage_type = "SFP+"; 2882 byte = 3; 2883 } else { 2884 /* nothing to do; media type cannot change */ 2885 return; 2886 } 2887 2888 /* 2889 * At this point we know the NIC has an XFP cage, so now we 2890 * try to determine what is in the cage by using the 2891 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2892 * register. We read just one byte, which may take over 2893 * a millisecond 2894 */ 2895 2896 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2897 cmd.data1 = byte; 2898 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2899 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2900 device_printf(sc->dev, "failed to read XFP\n"); 2901 } 2902 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2903 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2904 } 2905 if (err != MXGEFW_CMD_OK) { 2906 return; 2907 } 2908 2909 /* now we wait for the data to be cached */ 2910 cmd.data0 = byte; 2911 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2912 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2913 DELAY(1000); 2914 cmd.data0 = byte; 2915 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2916 } 2917 if (err != MXGEFW_CMD_OK) { 2918 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2919 cage_type, err, ms); 2920 return; 2921 } 2922 2923 if (cmd.data0 == mxge_media_types[0].bitmask) { 2924 if (mxge_verbose) 2925 device_printf(sc->dev, "%s:%s\n", cage_type, 2926 mxge_media_types[0].name); 2927 if (sc->current_media != mxge_media_types[0].flag) { 2928 mxge_media_init(sc); 2929 mxge_media_set(sc, mxge_media_types[0].flag); 2930 } 2931 return; 2932 } 2933 for (i = 1; i < mxge_media_type_entries; i++) { 2934 if (cmd.data0 & mxge_media_types[i].bitmask) { 2935 if (mxge_verbose) 2936 device_printf(sc->dev, "%s:%s\n", 2937 cage_type, 2938 mxge_media_types[i].name); 2939 2940 if (sc->current_media != mxge_media_types[i].flag) { 2941 mxge_media_init(sc); 2942 mxge_media_set(sc, mxge_media_types[i].flag); 2943 } 2944 return; 2945 } 2946 } 2947 if (mxge_verbose) 2948 device_printf(sc->dev, "%s media 0x%x unknown\n", 2949 cage_type, cmd.data0); 2950 2951 return; 2952 } 2953 2954 static void 2955 mxge_intr(void *arg) 2956 { 2957 struct mxge_slice_state *ss = arg; 2958 mxge_softc_t *sc = ss->sc; 2959 mcp_irq_data_t *stats = ss->fw_stats; 2960 mxge_tx_ring_t *tx = &ss->tx; 2961 mxge_rx_done_t *rx_done = &ss->rx_done; 2962 uint32_t send_done_count; 2963 uint8_t valid; 2964 2965 2966 #ifndef IFNET_BUF_RING 2967 /* an interrupt on a non-zero slice is implicitly valid 2968 since MSI-X irqs are not shared */ 2969 if (ss != sc->ss) { 2970 mxge_clean_rx_done(ss); 2971 *ss->irq_claim = be32toh(3); 2972 return; 2973 } 2974 #endif 2975 2976 /* make sure the DMA has finished */ 2977 if (!stats->valid) { 2978 return; 2979 } 2980 valid = stats->valid; 2981 2982 if (sc->legacy_irq) { 2983 /* lower legacy IRQ */ 2984 *sc->irq_deassert = 0; 2985 if (!mxge_deassert_wait) 2986 /* don't wait for conf. that irq is low */ 2987 stats->valid = 0; 2988 } else { 2989 stats->valid = 0; 2990 } 2991 2992 /* loop while waiting for legacy irq deassertion */ 2993 do { 2994 /* check for transmit completes and receives */ 2995 send_done_count = be32toh(stats->send_done_count); 2996 while ((send_done_count != tx->pkt_done) || 2997 (rx_done->entry[rx_done->idx].length != 0)) { 2998 if (send_done_count != tx->pkt_done) 2999 mxge_tx_done(ss, (int)send_done_count); 3000 mxge_clean_rx_done(ss); 3001 send_done_count = be32toh(stats->send_done_count); 3002 } 3003 if (sc->legacy_irq && mxge_deassert_wait) 3004 wmb(); 3005 } while (*((volatile uint8_t *) &stats->valid)); 3006 3007 /* fw link & error stats meaningful only on the first slice */ 3008 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3009 if (sc->link_state != stats->link_up) { 3010 sc->link_state = stats->link_up; 3011 if (sc->link_state) { 3012 if_link_state_change(sc->ifp, LINK_STATE_UP); 3013 sc->ifp->if_baudrate = IF_Gbps(10UL); 3014 if (mxge_verbose) 3015 device_printf(sc->dev, "link up\n"); 3016 } else { 3017 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3018 sc->ifp->if_baudrate = 0; 3019 if (mxge_verbose) 3020 device_printf(sc->dev, "link down\n"); 3021 } 3022 sc->need_media_probe = 1; 3023 } 3024 if (sc->rdma_tags_available != 3025 be32toh(stats->rdma_tags_available)) { 3026 sc->rdma_tags_available = 3027 be32toh(stats->rdma_tags_available); 3028 device_printf(sc->dev, "RDMA timed out! %d tags " 3029 "left\n", sc->rdma_tags_available); 3030 } 3031 3032 if (stats->link_down) { 3033 sc->down_cnt += stats->link_down; 3034 sc->link_state = 0; 3035 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3036 } 3037 } 3038 3039 /* check to see if we have rx token to pass back */ 3040 if (valid & 0x1) 3041 *ss->irq_claim = be32toh(3); 3042 *(ss->irq_claim + 1) = be32toh(3); 3043 } 3044 3045 static void 3046 mxge_init(void *arg) 3047 { 3048 } 3049 3050 3051 3052 static void 3053 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3054 { 3055 struct lro_entry *lro_entry; 3056 int i; 3057 3058 while (!SLIST_EMPTY(&ss->lro_free)) { 3059 lro_entry = SLIST_FIRST(&ss->lro_free); 3060 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3061 free(lro_entry, M_DEVBUF); 3062 } 3063 3064 for (i = 0; i <= ss->rx_big.mask; i++) { 3065 if (ss->rx_big.info[i].m == NULL) 3066 continue; 3067 bus_dmamap_unload(ss->rx_big.dmat, 3068 ss->rx_big.info[i].map); 3069 m_freem(ss->rx_big.info[i].m); 3070 ss->rx_big.info[i].m = NULL; 3071 } 3072 3073 for (i = 0; i <= ss->rx_small.mask; i++) { 3074 if (ss->rx_small.info[i].m == NULL) 3075 continue; 3076 bus_dmamap_unload(ss->rx_small.dmat, 3077 ss->rx_small.info[i].map); 3078 m_freem(ss->rx_small.info[i].m); 3079 ss->rx_small.info[i].m = NULL; 3080 } 3081 3082 /* transmit ring used only on the first slice */ 3083 if (ss->tx.info == NULL) 3084 return; 3085 3086 for (i = 0; i <= ss->tx.mask; i++) { 3087 ss->tx.info[i].flag = 0; 3088 if (ss->tx.info[i].m == NULL) 3089 continue; 3090 bus_dmamap_unload(ss->tx.dmat, 3091 ss->tx.info[i].map); 3092 m_freem(ss->tx.info[i].m); 3093 ss->tx.info[i].m = NULL; 3094 } 3095 } 3096 3097 static void 3098 mxge_free_mbufs(mxge_softc_t *sc) 3099 { 3100 int slice; 3101 3102 for (slice = 0; slice < sc->num_slices; slice++) 3103 mxge_free_slice_mbufs(&sc->ss[slice]); 3104 } 3105 3106 static void 3107 mxge_free_slice_rings(struct mxge_slice_state *ss) 3108 { 3109 int i; 3110 3111 3112 if (ss->rx_done.entry != NULL) 3113 mxge_dma_free(&ss->rx_done.dma); 3114 ss->rx_done.entry = NULL; 3115 3116 if (ss->tx.req_bytes != NULL) 3117 free(ss->tx.req_bytes, M_DEVBUF); 3118 ss->tx.req_bytes = NULL; 3119 3120 if (ss->tx.seg_list != NULL) 3121 free(ss->tx.seg_list, M_DEVBUF); 3122 ss->tx.seg_list = NULL; 3123 3124 if (ss->rx_small.shadow != NULL) 3125 free(ss->rx_small.shadow, M_DEVBUF); 3126 ss->rx_small.shadow = NULL; 3127 3128 if (ss->rx_big.shadow != NULL) 3129 free(ss->rx_big.shadow, M_DEVBUF); 3130 ss->rx_big.shadow = NULL; 3131 3132 if (ss->tx.info != NULL) { 3133 if (ss->tx.dmat != NULL) { 3134 for (i = 0; i <= ss->tx.mask; i++) { 3135 bus_dmamap_destroy(ss->tx.dmat, 3136 ss->tx.info[i].map); 3137 } 3138 bus_dma_tag_destroy(ss->tx.dmat); 3139 } 3140 free(ss->tx.info, M_DEVBUF); 3141 } 3142 ss->tx.info = NULL; 3143 3144 if (ss->rx_small.info != NULL) { 3145 if (ss->rx_small.dmat != NULL) { 3146 for (i = 0; i <= ss->rx_small.mask; i++) { 3147 bus_dmamap_destroy(ss->rx_small.dmat, 3148 ss->rx_small.info[i].map); 3149 } 3150 bus_dmamap_destroy(ss->rx_small.dmat, 3151 ss->rx_small.extra_map); 3152 bus_dma_tag_destroy(ss->rx_small.dmat); 3153 } 3154 free(ss->rx_small.info, M_DEVBUF); 3155 } 3156 ss->rx_small.info = NULL; 3157 3158 if (ss->rx_big.info != NULL) { 3159 if (ss->rx_big.dmat != NULL) { 3160 for (i = 0; i <= ss->rx_big.mask; i++) { 3161 bus_dmamap_destroy(ss->rx_big.dmat, 3162 ss->rx_big.info[i].map); 3163 } 3164 bus_dmamap_destroy(ss->rx_big.dmat, 3165 ss->rx_big.extra_map); 3166 bus_dma_tag_destroy(ss->rx_big.dmat); 3167 } 3168 free(ss->rx_big.info, M_DEVBUF); 3169 } 3170 ss->rx_big.info = NULL; 3171 } 3172 3173 static void 3174 mxge_free_rings(mxge_softc_t *sc) 3175 { 3176 int slice; 3177 3178 for (slice = 0; slice < sc->num_slices; slice++) 3179 mxge_free_slice_rings(&sc->ss[slice]); 3180 } 3181 3182 static int 3183 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3184 int tx_ring_entries) 3185 { 3186 mxge_softc_t *sc = ss->sc; 3187 size_t bytes; 3188 int err, i; 3189 3190 err = ENOMEM; 3191 3192 /* allocate per-slice receive resources */ 3193 3194 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3195 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3196 3197 /* allocate the rx shadow rings */ 3198 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3199 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3200 if (ss->rx_small.shadow == NULL) 3201 return err; 3202 3203 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3204 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3205 if (ss->rx_big.shadow == NULL) 3206 return err; 3207 3208 /* allocate the rx host info rings */ 3209 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3210 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3211 if (ss->rx_small.info == NULL) 3212 return err; 3213 3214 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3215 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3216 if (ss->rx_big.info == NULL) 3217 return err; 3218 3219 /* allocate the rx busdma resources */ 3220 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3221 1, /* alignment */ 3222 4096, /* boundary */ 3223 BUS_SPACE_MAXADDR, /* low */ 3224 BUS_SPACE_MAXADDR, /* high */ 3225 NULL, NULL, /* filter */ 3226 MHLEN, /* maxsize */ 3227 1, /* num segs */ 3228 MHLEN, /* maxsegsize */ 3229 BUS_DMA_ALLOCNOW, /* flags */ 3230 NULL, NULL, /* lock */ 3231 &ss->rx_small.dmat); /* tag */ 3232 if (err != 0) { 3233 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3234 err); 3235 return err; 3236 } 3237 3238 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3239 1, /* alignment */ 3240 #if MXGE_VIRT_JUMBOS 3241 4096, /* boundary */ 3242 #else 3243 0, /* boundary */ 3244 #endif 3245 BUS_SPACE_MAXADDR, /* low */ 3246 BUS_SPACE_MAXADDR, /* high */ 3247 NULL, NULL, /* filter */ 3248 3*4096, /* maxsize */ 3249 #if MXGE_VIRT_JUMBOS 3250 3, /* num segs */ 3251 4096, /* maxsegsize*/ 3252 #else 3253 1, /* num segs */ 3254 MJUM9BYTES, /* maxsegsize*/ 3255 #endif 3256 BUS_DMA_ALLOCNOW, /* flags */ 3257 NULL, NULL, /* lock */ 3258 &ss->rx_big.dmat); /* tag */ 3259 if (err != 0) { 3260 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3261 err); 3262 return err; 3263 } 3264 for (i = 0; i <= ss->rx_small.mask; i++) { 3265 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3266 &ss->rx_small.info[i].map); 3267 if (err != 0) { 3268 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3269 err); 3270 return err; 3271 } 3272 } 3273 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3274 &ss->rx_small.extra_map); 3275 if (err != 0) { 3276 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3277 err); 3278 return err; 3279 } 3280 3281 for (i = 0; i <= ss->rx_big.mask; i++) { 3282 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3283 &ss->rx_big.info[i].map); 3284 if (err != 0) { 3285 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3286 err); 3287 return err; 3288 } 3289 } 3290 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3291 &ss->rx_big.extra_map); 3292 if (err != 0) { 3293 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3294 err); 3295 return err; 3296 } 3297 3298 /* now allocate TX resouces */ 3299 3300 #ifndef IFNET_BUF_RING 3301 /* only use a single TX ring for now */ 3302 if (ss != ss->sc->ss) 3303 return 0; 3304 #endif 3305 3306 ss->tx.mask = tx_ring_entries - 1; 3307 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3308 3309 3310 /* allocate the tx request copy block */ 3311 bytes = 8 + 3312 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3313 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3314 if (ss->tx.req_bytes == NULL) 3315 return err; 3316 /* ensure req_list entries are aligned to 8 bytes */ 3317 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3318 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3319 3320 /* allocate the tx busdma segment list */ 3321 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3322 ss->tx.seg_list = (bus_dma_segment_t *) 3323 malloc(bytes, M_DEVBUF, M_WAITOK); 3324 if (ss->tx.seg_list == NULL) 3325 return err; 3326 3327 /* allocate the tx host info ring */ 3328 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3329 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3330 if (ss->tx.info == NULL) 3331 return err; 3332 3333 /* allocate the tx busdma resources */ 3334 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3335 1, /* alignment */ 3336 sc->tx_boundary, /* boundary */ 3337 BUS_SPACE_MAXADDR, /* low */ 3338 BUS_SPACE_MAXADDR, /* high */ 3339 NULL, NULL, /* filter */ 3340 65536 + 256, /* maxsize */ 3341 ss->tx.max_desc - 2, /* num segs */ 3342 sc->tx_boundary, /* maxsegsz */ 3343 BUS_DMA_ALLOCNOW, /* flags */ 3344 NULL, NULL, /* lock */ 3345 &ss->tx.dmat); /* tag */ 3346 3347 if (err != 0) { 3348 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3349 err); 3350 return err; 3351 } 3352 3353 /* now use these tags to setup dmamaps for each slot 3354 in the ring */ 3355 for (i = 0; i <= ss->tx.mask; i++) { 3356 err = bus_dmamap_create(ss->tx.dmat, 0, 3357 &ss->tx.info[i].map); 3358 if (err != 0) { 3359 device_printf(sc->dev, "Err %d tx dmamap\n", 3360 err); 3361 return err; 3362 } 3363 } 3364 return 0; 3365 3366 } 3367 3368 static int 3369 mxge_alloc_rings(mxge_softc_t *sc) 3370 { 3371 mxge_cmd_t cmd; 3372 int tx_ring_size; 3373 int tx_ring_entries, rx_ring_entries; 3374 int err, slice; 3375 3376 /* get ring sizes */ 3377 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3378 tx_ring_size = cmd.data0; 3379 if (err != 0) { 3380 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3381 goto abort; 3382 } 3383 3384 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3385 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3386 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3387 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3388 IFQ_SET_READY(&sc->ifp->if_snd); 3389 3390 for (slice = 0; slice < sc->num_slices; slice++) { 3391 err = mxge_alloc_slice_rings(&sc->ss[slice], 3392 rx_ring_entries, 3393 tx_ring_entries); 3394 if (err != 0) 3395 goto abort; 3396 } 3397 return 0; 3398 3399 abort: 3400 mxge_free_rings(sc); 3401 return err; 3402 3403 } 3404 3405 3406 static void 3407 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3408 { 3409 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3410 3411 if (bufsize < MCLBYTES) { 3412 /* easy, everything fits in a single buffer */ 3413 *big_buf_size = MCLBYTES; 3414 *cl_size = MCLBYTES; 3415 *nbufs = 1; 3416 return; 3417 } 3418 3419 if (bufsize < MJUMPAGESIZE) { 3420 /* still easy, everything still fits in a single buffer */ 3421 *big_buf_size = MJUMPAGESIZE; 3422 *cl_size = MJUMPAGESIZE; 3423 *nbufs = 1; 3424 return; 3425 } 3426 #if MXGE_VIRT_JUMBOS 3427 /* now we need to use virtually contiguous buffers */ 3428 *cl_size = MJUM9BYTES; 3429 *big_buf_size = 4096; 3430 *nbufs = mtu / 4096 + 1; 3431 /* needs to be a power of two, so round up */ 3432 if (*nbufs == 3) 3433 *nbufs = 4; 3434 #else 3435 *cl_size = MJUM9BYTES; 3436 *big_buf_size = MJUM9BYTES; 3437 *nbufs = 1; 3438 #endif 3439 } 3440 3441 static int 3442 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3443 { 3444 mxge_softc_t *sc; 3445 mxge_cmd_t cmd; 3446 bus_dmamap_t map; 3447 struct lro_entry *lro_entry; 3448 int err, i, slice; 3449 3450 3451 sc = ss->sc; 3452 slice = ss - sc->ss; 3453 3454 SLIST_INIT(&ss->lro_free); 3455 SLIST_INIT(&ss->lro_active); 3456 3457 for (i = 0; i < sc->lro_cnt; i++) { 3458 lro_entry = (struct lro_entry *) 3459 malloc(sizeof (*lro_entry), M_DEVBUF, 3460 M_NOWAIT | M_ZERO); 3461 if (lro_entry == NULL) { 3462 sc->lro_cnt = i; 3463 break; 3464 } 3465 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3466 } 3467 /* get the lanai pointers to the send and receive rings */ 3468 3469 err = 0; 3470 #ifndef IFNET_BUF_RING 3471 /* We currently only send from the first slice */ 3472 if (slice == 0) { 3473 #endif 3474 cmd.data0 = slice; 3475 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3476 ss->tx.lanai = 3477 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3478 ss->tx.send_go = (volatile uint32_t *) 3479 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3480 ss->tx.send_stop = (volatile uint32_t *) 3481 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3482 #ifndef IFNET_BUF_RING 3483 } 3484 #endif 3485 cmd.data0 = slice; 3486 err |= mxge_send_cmd(sc, 3487 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3488 ss->rx_small.lanai = 3489 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3490 cmd.data0 = slice; 3491 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3492 ss->rx_big.lanai = 3493 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3494 3495 if (err != 0) { 3496 device_printf(sc->dev, 3497 "failed to get ring sizes or locations\n"); 3498 return EIO; 3499 } 3500 3501 /* stock receive rings */ 3502 for (i = 0; i <= ss->rx_small.mask; i++) { 3503 map = ss->rx_small.info[i].map; 3504 err = mxge_get_buf_small(ss, map, i); 3505 if (err) { 3506 device_printf(sc->dev, "alloced %d/%d smalls\n", 3507 i, ss->rx_small.mask + 1); 3508 return ENOMEM; 3509 } 3510 } 3511 for (i = 0; i <= ss->rx_big.mask; i++) { 3512 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3513 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3514 } 3515 ss->rx_big.nbufs = nbufs; 3516 ss->rx_big.cl_size = cl_size; 3517 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3518 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3519 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3520 map = ss->rx_big.info[i].map; 3521 err = mxge_get_buf_big(ss, map, i); 3522 if (err) { 3523 device_printf(sc->dev, "alloced %d/%d bigs\n", 3524 i, ss->rx_big.mask + 1); 3525 return ENOMEM; 3526 } 3527 } 3528 return 0; 3529 } 3530 3531 static int 3532 mxge_open(mxge_softc_t *sc) 3533 { 3534 mxge_cmd_t cmd; 3535 int err, big_bytes, nbufs, slice, cl_size, i; 3536 bus_addr_t bus; 3537 volatile uint8_t *itable; 3538 struct mxge_slice_state *ss; 3539 3540 /* Copy the MAC address in case it was overridden */ 3541 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3542 3543 err = mxge_reset(sc, 1); 3544 if (err != 0) { 3545 device_printf(sc->dev, "failed to reset\n"); 3546 return EIO; 3547 } 3548 3549 if (sc->num_slices > 1) { 3550 /* setup the indirection table */ 3551 cmd.data0 = sc->num_slices; 3552 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3553 &cmd); 3554 3555 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3556 &cmd); 3557 if (err != 0) { 3558 device_printf(sc->dev, 3559 "failed to setup rss tables\n"); 3560 return err; 3561 } 3562 3563 /* just enable an identity mapping */ 3564 itable = sc->sram + cmd.data0; 3565 for (i = 0; i < sc->num_slices; i++) 3566 itable[i] = (uint8_t)i; 3567 3568 cmd.data0 = 1; 3569 cmd.data1 = mxge_rss_hash_type; 3570 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3571 if (err != 0) { 3572 device_printf(sc->dev, "failed to enable slices\n"); 3573 return err; 3574 } 3575 } 3576 3577 3578 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3579 3580 cmd.data0 = nbufs; 3581 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3582 &cmd); 3583 /* error is only meaningful if we're trying to set 3584 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3585 if (err && nbufs > 1) { 3586 device_printf(sc->dev, 3587 "Failed to set alway-use-n to %d\n", 3588 nbufs); 3589 return EIO; 3590 } 3591 /* Give the firmware the mtu and the big and small buffer 3592 sizes. The firmware wants the big buf size to be a power 3593 of two. Luckily, FreeBSD's clusters are powers of two */ 3594 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3595 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3596 cmd.data0 = MHLEN - MXGEFW_PAD; 3597 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3598 &cmd); 3599 cmd.data0 = big_bytes; 3600 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3601 3602 if (err != 0) { 3603 device_printf(sc->dev, "failed to setup params\n"); 3604 goto abort; 3605 } 3606 3607 /* Now give him the pointer to the stats block */ 3608 for (slice = 0; 3609 #ifdef IFNET_BUF_RING 3610 slice < sc->num_slices; 3611 #else 3612 slice < 1; 3613 #endif 3614 slice++) { 3615 ss = &sc->ss[slice]; 3616 cmd.data0 = 3617 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3618 cmd.data1 = 3619 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3620 cmd.data2 = sizeof(struct mcp_irq_data); 3621 cmd.data2 |= (slice << 16); 3622 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3623 } 3624 3625 if (err != 0) { 3626 bus = sc->ss->fw_stats_dma.bus_addr; 3627 bus += offsetof(struct mcp_irq_data, send_done_count); 3628 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3629 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3630 err = mxge_send_cmd(sc, 3631 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3632 &cmd); 3633 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3634 sc->fw_multicast_support = 0; 3635 } else { 3636 sc->fw_multicast_support = 1; 3637 } 3638 3639 if (err != 0) { 3640 device_printf(sc->dev, "failed to setup params\n"); 3641 goto abort; 3642 } 3643 3644 for (slice = 0; slice < sc->num_slices; slice++) { 3645 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3646 if (err != 0) { 3647 device_printf(sc->dev, "couldn't open slice %d\n", 3648 slice); 3649 goto abort; 3650 } 3651 } 3652 3653 /* Finally, start the firmware running */ 3654 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3655 if (err) { 3656 device_printf(sc->dev, "Couldn't bring up link\n"); 3657 goto abort; 3658 } 3659 #ifdef IFNET_BUF_RING 3660 for (slice = 0; slice < sc->num_slices; slice++) { 3661 ss = &sc->ss[slice]; 3662 ss->if_drv_flags |= IFF_DRV_RUNNING; 3663 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3664 } 3665 #endif 3666 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3667 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3668 3669 return 0; 3670 3671 3672 abort: 3673 mxge_free_mbufs(sc); 3674 3675 return err; 3676 } 3677 3678 static int 3679 mxge_close(mxge_softc_t *sc, int down) 3680 { 3681 mxge_cmd_t cmd; 3682 int err, old_down_cnt; 3683 #ifdef IFNET_BUF_RING 3684 struct mxge_slice_state *ss; 3685 int slice; 3686 #endif 3687 3688 #ifdef IFNET_BUF_RING 3689 for (slice = 0; slice < sc->num_slices; slice++) { 3690 ss = &sc->ss[slice]; 3691 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3692 } 3693 #endif 3694 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3695 if (!down) { 3696 old_down_cnt = sc->down_cnt; 3697 wmb(); 3698 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3699 if (err) { 3700 device_printf(sc->dev, 3701 "Couldn't bring down link\n"); 3702 } 3703 if (old_down_cnt == sc->down_cnt) { 3704 /* wait for down irq */ 3705 DELAY(10 * sc->intr_coal_delay); 3706 } 3707 wmb(); 3708 if (old_down_cnt == sc->down_cnt) { 3709 device_printf(sc->dev, "never got down irq\n"); 3710 } 3711 } 3712 mxge_free_mbufs(sc); 3713 3714 return 0; 3715 } 3716 3717 static void 3718 mxge_setup_cfg_space(mxge_softc_t *sc) 3719 { 3720 device_t dev = sc->dev; 3721 int reg; 3722 uint16_t cmd, lnk, pectl; 3723 3724 /* find the PCIe link width and set max read request to 4KB*/ 3725 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3726 lnk = pci_read_config(dev, reg + 0x12, 2); 3727 sc->link_width = (lnk >> 4) & 0x3f; 3728 3729 if (sc->pectl == 0) { 3730 pectl = pci_read_config(dev, reg + 0x8, 2); 3731 pectl = (pectl & ~0x7000) | (5 << 12); 3732 pci_write_config(dev, reg + 0x8, pectl, 2); 3733 sc->pectl = pectl; 3734 } else { 3735 /* restore saved pectl after watchdog reset */ 3736 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3737 } 3738 } 3739 3740 /* Enable DMA and Memory space access */ 3741 pci_enable_busmaster(dev); 3742 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3743 cmd |= PCIM_CMD_MEMEN; 3744 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3745 } 3746 3747 static uint32_t 3748 mxge_read_reboot(mxge_softc_t *sc) 3749 { 3750 device_t dev = sc->dev; 3751 uint32_t vs; 3752 3753 /* find the vendor specific offset */ 3754 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3755 device_printf(sc->dev, 3756 "could not find vendor specific offset\n"); 3757 return (uint32_t)-1; 3758 } 3759 /* enable read32 mode */ 3760 pci_write_config(dev, vs + 0x10, 0x3, 1); 3761 /* tell NIC which register to read */ 3762 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3763 return (pci_read_config(dev, vs + 0x14, 4)); 3764 } 3765 3766 static void 3767 mxge_watchdog_reset(mxge_softc_t *sc) 3768 { 3769 struct pci_devinfo *dinfo; 3770 struct mxge_slice_state *ss; 3771 int err, running, s, num_tx_slices = 1; 3772 uint32_t reboot; 3773 uint16_t cmd; 3774 3775 err = ENXIO; 3776 3777 device_printf(sc->dev, "Watchdog reset!\n"); 3778 3779 /* 3780 * check to see if the NIC rebooted. If it did, then all of 3781 * PCI config space has been reset, and things like the 3782 * busmaster bit will be zero. If this is the case, then we 3783 * must restore PCI config space before the NIC can be used 3784 * again 3785 */ 3786 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3787 if (cmd == 0xffff) { 3788 /* 3789 * maybe the watchdog caught the NIC rebooting; wait 3790 * up to 100ms for it to finish. If it does not come 3791 * back, then give up 3792 */ 3793 DELAY(1000*100); 3794 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3795 if (cmd == 0xffff) { 3796 device_printf(sc->dev, "NIC disappeared!\n"); 3797 } 3798 } 3799 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3800 /* print the reboot status */ 3801 reboot = mxge_read_reboot(sc); 3802 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3803 reboot); 3804 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3805 if (running) { 3806 3807 /* 3808 * quiesce NIC so that TX routines will not try to 3809 * xmit after restoration of BAR 3810 */ 3811 3812 /* Mark the link as down */ 3813 if (sc->link_state) { 3814 sc->link_state = 0; 3815 if_link_state_change(sc->ifp, 3816 LINK_STATE_DOWN); 3817 } 3818 #ifdef IFNET_BUF_RING 3819 num_tx_slices = sc->num_slices; 3820 #endif 3821 /* grab all TX locks to ensure no tx */ 3822 for (s = 0; s < num_tx_slices; s++) { 3823 ss = &sc->ss[s]; 3824 mtx_lock(&ss->tx.mtx); 3825 } 3826 mxge_close(sc, 1); 3827 } 3828 /* restore PCI configuration space */ 3829 dinfo = device_get_ivars(sc->dev); 3830 pci_cfg_restore(sc->dev, dinfo); 3831 3832 /* and redo any changes we made to our config space */ 3833 mxge_setup_cfg_space(sc); 3834 3835 /* reload f/w */ 3836 err = mxge_load_firmware(sc, 0); 3837 if (err) { 3838 device_printf(sc->dev, 3839 "Unable to re-load f/w\n"); 3840 } 3841 if (running) { 3842 if (!err) 3843 err = mxge_open(sc); 3844 /* release all TX locks */ 3845 for (s = 0; s < num_tx_slices; s++) { 3846 ss = &sc->ss[s]; 3847 #ifdef IFNET_BUF_RING 3848 mxge_start_locked(ss); 3849 #endif 3850 mtx_unlock(&ss->tx.mtx); 3851 } 3852 } 3853 sc->watchdog_resets++; 3854 } else { 3855 device_printf(sc->dev, 3856 "NIC did not reboot, not resetting\n"); 3857 err = 0; 3858 } 3859 if (err) { 3860 device_printf(sc->dev, "watchdog reset failed\n"); 3861 } else { 3862 if (sc->dying == 2) 3863 sc->dying = 0; 3864 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3865 } 3866 } 3867 3868 static void 3869 mxge_watchdog_task(void *arg, int pending) 3870 { 3871 mxge_softc_t *sc = arg; 3872 3873 3874 mtx_lock(&sc->driver_mtx); 3875 mxge_watchdog_reset(sc); 3876 mtx_unlock(&sc->driver_mtx); 3877 } 3878 3879 static void 3880 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3881 { 3882 tx = &sc->ss[slice].tx; 3883 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3884 device_printf(sc->dev, 3885 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3886 tx->req, tx->done, tx->queue_active); 3887 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3888 tx->activate, tx->deactivate); 3889 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3890 tx->pkt_done, 3891 be32toh(sc->ss->fw_stats->send_done_count)); 3892 } 3893 3894 static int 3895 mxge_watchdog(mxge_softc_t *sc) 3896 { 3897 mxge_tx_ring_t *tx; 3898 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3899 int i, err = 0; 3900 3901 /* see if we have outstanding transmits, which 3902 have been pending for more than mxge_ticks */ 3903 for (i = 0; 3904 #ifdef IFNET_BUF_RING 3905 (i < sc->num_slices) && (err == 0); 3906 #else 3907 (i < 1) && (err == 0); 3908 #endif 3909 i++) { 3910 tx = &sc->ss[i].tx; 3911 if (tx->req != tx->done && 3912 tx->watchdog_req != tx->watchdog_done && 3913 tx->done == tx->watchdog_done) { 3914 /* check for pause blocking before resetting */ 3915 if (tx->watchdog_rx_pause == rx_pause) { 3916 mxge_warn_stuck(sc, tx, i); 3917 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3918 return (ENXIO); 3919 } 3920 else 3921 device_printf(sc->dev, "Flow control blocking " 3922 "xmits, check link partner\n"); 3923 } 3924 3925 tx->watchdog_req = tx->req; 3926 tx->watchdog_done = tx->done; 3927 tx->watchdog_rx_pause = rx_pause; 3928 } 3929 3930 if (sc->need_media_probe) 3931 mxge_media_probe(sc); 3932 return (err); 3933 } 3934 3935 static u_long 3936 mxge_update_stats(mxge_softc_t *sc) 3937 { 3938 struct mxge_slice_state *ss; 3939 u_long pkts = 0; 3940 u_long ipackets = 0; 3941 u_long opackets = 0; 3942 #ifdef IFNET_BUF_RING 3943 u_long obytes = 0; 3944 u_long omcasts = 0; 3945 u_long odrops = 0; 3946 #endif 3947 u_long oerrors = 0; 3948 int slice; 3949 3950 for (slice = 0; slice < sc->num_slices; slice++) { 3951 ss = &sc->ss[slice]; 3952 ipackets += ss->ipackets; 3953 opackets += ss->opackets; 3954 #ifdef IFNET_BUF_RING 3955 obytes += ss->obytes; 3956 omcasts += ss->omcasts; 3957 odrops += ss->tx.br->br_drops; 3958 #endif 3959 oerrors += ss->oerrors; 3960 } 3961 pkts = (ipackets - sc->ifp->if_ipackets); 3962 pkts += (opackets - sc->ifp->if_opackets); 3963 sc->ifp->if_ipackets = ipackets; 3964 sc->ifp->if_opackets = opackets; 3965 #ifdef IFNET_BUF_RING 3966 sc->ifp->if_obytes = obytes; 3967 sc->ifp->if_omcasts = omcasts; 3968 sc->ifp->if_snd.ifq_drops = odrops; 3969 #endif 3970 sc->ifp->if_oerrors = oerrors; 3971 return pkts; 3972 } 3973 3974 static void 3975 mxge_tick(void *arg) 3976 { 3977 mxge_softc_t *sc = arg; 3978 u_long pkts = 0; 3979 int err = 0; 3980 int running, ticks; 3981 uint16_t cmd; 3982 3983 ticks = mxge_ticks; 3984 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3985 if (running) { 3986 /* aggregate stats from different slices */ 3987 pkts = mxge_update_stats(sc); 3988 if (!sc->watchdog_countdown) { 3989 err = mxge_watchdog(sc); 3990 sc->watchdog_countdown = 4; 3991 } 3992 sc->watchdog_countdown--; 3993 } 3994 if (pkts == 0) { 3995 /* ensure NIC did not suffer h/w fault while idle */ 3996 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3997 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3998 sc->dying = 2; 3999 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4000 err = ENXIO; 4001 } 4002 /* look less often if NIC is idle */ 4003 ticks *= 4; 4004 } 4005 4006 if (err == 0) 4007 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4008 4009 } 4010 4011 static int 4012 mxge_media_change(struct ifnet *ifp) 4013 { 4014 return EINVAL; 4015 } 4016 4017 static int 4018 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4019 { 4020 struct ifnet *ifp = sc->ifp; 4021 int real_mtu, old_mtu; 4022 int err = 0; 4023 4024 4025 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4026 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4027 return EINVAL; 4028 mtx_lock(&sc->driver_mtx); 4029 old_mtu = ifp->if_mtu; 4030 ifp->if_mtu = mtu; 4031 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4032 mxge_close(sc, 0); 4033 err = mxge_open(sc); 4034 if (err != 0) { 4035 ifp->if_mtu = old_mtu; 4036 mxge_close(sc, 0); 4037 (void) mxge_open(sc); 4038 } 4039 } 4040 mtx_unlock(&sc->driver_mtx); 4041 return err; 4042 } 4043 4044 static void 4045 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4046 { 4047 mxge_softc_t *sc = ifp->if_softc; 4048 4049 4050 if (sc == NULL) 4051 return; 4052 ifmr->ifm_status = IFM_AVALID; 4053 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4054 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4055 ifmr->ifm_active |= sc->current_media; 4056 } 4057 4058 static int 4059 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4060 { 4061 mxge_softc_t *sc = ifp->if_softc; 4062 struct ifreq *ifr = (struct ifreq *)data; 4063 int err, mask; 4064 4065 err = 0; 4066 switch (command) { 4067 case SIOCSIFADDR: 4068 case SIOCGIFADDR: 4069 err = ether_ioctl(ifp, command, data); 4070 break; 4071 4072 case SIOCSIFMTU: 4073 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4074 break; 4075 4076 case SIOCSIFFLAGS: 4077 mtx_lock(&sc->driver_mtx); 4078 if (sc->dying) { 4079 mtx_unlock(&sc->driver_mtx); 4080 return EINVAL; 4081 } 4082 if (ifp->if_flags & IFF_UP) { 4083 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4084 err = mxge_open(sc); 4085 } else { 4086 /* take care of promis can allmulti 4087 flag chages */ 4088 mxge_change_promisc(sc, 4089 ifp->if_flags & IFF_PROMISC); 4090 mxge_set_multicast_list(sc); 4091 } 4092 } else { 4093 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4094 mxge_close(sc, 0); 4095 } 4096 } 4097 mtx_unlock(&sc->driver_mtx); 4098 break; 4099 4100 case SIOCADDMULTI: 4101 case SIOCDELMULTI: 4102 mtx_lock(&sc->driver_mtx); 4103 mxge_set_multicast_list(sc); 4104 mtx_unlock(&sc->driver_mtx); 4105 break; 4106 4107 case SIOCSIFCAP: 4108 mtx_lock(&sc->driver_mtx); 4109 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4110 if (mask & IFCAP_TXCSUM) { 4111 if (IFCAP_TXCSUM & ifp->if_capenable) { 4112 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4113 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4114 | CSUM_TSO); 4115 } else { 4116 ifp->if_capenable |= IFCAP_TXCSUM; 4117 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4118 } 4119 } else if (mask & IFCAP_RXCSUM) { 4120 if (IFCAP_RXCSUM & ifp->if_capenable) { 4121 ifp->if_capenable &= ~IFCAP_RXCSUM; 4122 sc->csum_flag = 0; 4123 } else { 4124 ifp->if_capenable |= IFCAP_RXCSUM; 4125 sc->csum_flag = 1; 4126 } 4127 } 4128 if (mask & IFCAP_TSO4) { 4129 if (IFCAP_TSO4 & ifp->if_capenable) { 4130 ifp->if_capenable &= ~IFCAP_TSO4; 4131 ifp->if_hwassist &= ~CSUM_TSO; 4132 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4133 ifp->if_capenable |= IFCAP_TSO4; 4134 ifp->if_hwassist |= CSUM_TSO; 4135 } else { 4136 printf("mxge requires tx checksum offload" 4137 " be enabled to use TSO\n"); 4138 err = EINVAL; 4139 } 4140 } 4141 if (mask & IFCAP_LRO) { 4142 if (IFCAP_LRO & ifp->if_capenable) 4143 err = mxge_change_lro_locked(sc, 0); 4144 else 4145 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4146 } 4147 if (mask & IFCAP_VLAN_HWTAGGING) 4148 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4149 if (mask & IFCAP_VLAN_HWTSO) 4150 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4151 4152 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4153 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4154 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4155 4156 mtx_unlock(&sc->driver_mtx); 4157 VLAN_CAPABILITIES(ifp); 4158 4159 break; 4160 4161 case SIOCGIFMEDIA: 4162 mtx_lock(&sc->driver_mtx); 4163 mxge_media_probe(sc); 4164 mtx_unlock(&sc->driver_mtx); 4165 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4166 &sc->media, command); 4167 break; 4168 4169 default: 4170 err = ENOTTY; 4171 } 4172 return err; 4173 } 4174 4175 static void 4176 mxge_fetch_tunables(mxge_softc_t *sc) 4177 { 4178 4179 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4180 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4181 &mxge_flow_control); 4182 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4183 &mxge_intr_coal_delay); 4184 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4185 &mxge_nvidia_ecrc_enable); 4186 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4187 &mxge_force_firmware); 4188 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4189 &mxge_deassert_wait); 4190 TUNABLE_INT_FETCH("hw.mxge.verbose", 4191 &mxge_verbose); 4192 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4193 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4194 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4195 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4196 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4197 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4198 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4199 if (sc->lro_cnt != 0) 4200 mxge_lro_cnt = sc->lro_cnt; 4201 4202 if (bootverbose) 4203 mxge_verbose = 1; 4204 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4205 mxge_intr_coal_delay = 30; 4206 if (mxge_ticks == 0) 4207 mxge_ticks = hz / 2; 4208 sc->pause = mxge_flow_control; 4209 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4210 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4211 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4212 } 4213 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4214 mxge_initial_mtu < ETHER_MIN_LEN) 4215 mxge_initial_mtu = ETHERMTU_JUMBO; 4216 4217 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4218 mxge_throttle = MXGE_MAX_THROTTLE; 4219 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4220 mxge_throttle = MXGE_MIN_THROTTLE; 4221 sc->throttle = mxge_throttle; 4222 } 4223 4224 4225 static void 4226 mxge_free_slices(mxge_softc_t *sc) 4227 { 4228 struct mxge_slice_state *ss; 4229 int i; 4230 4231 4232 if (sc->ss == NULL) 4233 return; 4234 4235 for (i = 0; i < sc->num_slices; i++) { 4236 ss = &sc->ss[i]; 4237 if (ss->fw_stats != NULL) { 4238 mxge_dma_free(&ss->fw_stats_dma); 4239 ss->fw_stats = NULL; 4240 #ifdef IFNET_BUF_RING 4241 if (ss->tx.br != NULL) { 4242 drbr_free(ss->tx.br, M_DEVBUF); 4243 ss->tx.br = NULL; 4244 } 4245 #endif 4246 mtx_destroy(&ss->tx.mtx); 4247 } 4248 if (ss->rx_done.entry != NULL) { 4249 mxge_dma_free(&ss->rx_done.dma); 4250 ss->rx_done.entry = NULL; 4251 } 4252 } 4253 free(sc->ss, M_DEVBUF); 4254 sc->ss = NULL; 4255 } 4256 4257 static int 4258 mxge_alloc_slices(mxge_softc_t *sc) 4259 { 4260 mxge_cmd_t cmd; 4261 struct mxge_slice_state *ss; 4262 size_t bytes; 4263 int err, i, max_intr_slots; 4264 4265 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4266 if (err != 0) { 4267 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4268 return err; 4269 } 4270 sc->rx_ring_size = cmd.data0; 4271 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4272 4273 bytes = sizeof (*sc->ss) * sc->num_slices; 4274 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4275 if (sc->ss == NULL) 4276 return (ENOMEM); 4277 for (i = 0; i < sc->num_slices; i++) { 4278 ss = &sc->ss[i]; 4279 4280 ss->sc = sc; 4281 4282 /* allocate per-slice rx interrupt queues */ 4283 4284 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4285 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4286 if (err != 0) 4287 goto abort; 4288 ss->rx_done.entry = ss->rx_done.dma.addr; 4289 bzero(ss->rx_done.entry, bytes); 4290 4291 /* 4292 * allocate the per-slice firmware stats; stats 4293 * (including tx) are used used only on the first 4294 * slice for now 4295 */ 4296 #ifndef IFNET_BUF_RING 4297 if (i > 0) 4298 continue; 4299 #endif 4300 4301 bytes = sizeof (*ss->fw_stats); 4302 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4303 sizeof (*ss->fw_stats), 64); 4304 if (err != 0) 4305 goto abort; 4306 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4307 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4308 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4309 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4310 #ifdef IFNET_BUF_RING 4311 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4312 &ss->tx.mtx); 4313 #endif 4314 } 4315 4316 return (0); 4317 4318 abort: 4319 mxge_free_slices(sc); 4320 return (ENOMEM); 4321 } 4322 4323 static void 4324 mxge_slice_probe(mxge_softc_t *sc) 4325 { 4326 mxge_cmd_t cmd; 4327 char *old_fw; 4328 int msix_cnt, status, max_intr_slots; 4329 4330 sc->num_slices = 1; 4331 /* 4332 * don't enable multiple slices if they are not enabled, 4333 * or if this is not an SMP system 4334 */ 4335 4336 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4337 return; 4338 4339 /* see how many MSI-X interrupts are available */ 4340 msix_cnt = pci_msix_count(sc->dev); 4341 if (msix_cnt < 2) 4342 return; 4343 4344 /* now load the slice aware firmware see what it supports */ 4345 old_fw = sc->fw_name; 4346 if (old_fw == mxge_fw_aligned) 4347 sc->fw_name = mxge_fw_rss_aligned; 4348 else 4349 sc->fw_name = mxge_fw_rss_unaligned; 4350 status = mxge_load_firmware(sc, 0); 4351 if (status != 0) { 4352 device_printf(sc->dev, "Falling back to a single slice\n"); 4353 return; 4354 } 4355 4356 /* try to send a reset command to the card to see if it 4357 is alive */ 4358 memset(&cmd, 0, sizeof (cmd)); 4359 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4360 if (status != 0) { 4361 device_printf(sc->dev, "failed reset\n"); 4362 goto abort_with_fw; 4363 } 4364 4365 /* get rx ring size */ 4366 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4367 if (status != 0) { 4368 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4369 goto abort_with_fw; 4370 } 4371 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4372 4373 /* tell it the size of the interrupt queues */ 4374 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4375 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4376 if (status != 0) { 4377 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4378 goto abort_with_fw; 4379 } 4380 4381 /* ask the maximum number of slices it supports */ 4382 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4383 if (status != 0) { 4384 device_printf(sc->dev, 4385 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4386 goto abort_with_fw; 4387 } 4388 sc->num_slices = cmd.data0; 4389 if (sc->num_slices > msix_cnt) 4390 sc->num_slices = msix_cnt; 4391 4392 if (mxge_max_slices == -1) { 4393 /* cap to number of CPUs in system */ 4394 if (sc->num_slices > mp_ncpus) 4395 sc->num_slices = mp_ncpus; 4396 } else { 4397 if (sc->num_slices > mxge_max_slices) 4398 sc->num_slices = mxge_max_slices; 4399 } 4400 /* make sure it is a power of two */ 4401 while (sc->num_slices & (sc->num_slices - 1)) 4402 sc->num_slices--; 4403 4404 if (mxge_verbose) 4405 device_printf(sc->dev, "using %d slices\n", 4406 sc->num_slices); 4407 4408 return; 4409 4410 abort_with_fw: 4411 sc->fw_name = old_fw; 4412 (void) mxge_load_firmware(sc, 0); 4413 } 4414 4415 static int 4416 mxge_add_msix_irqs(mxge_softc_t *sc) 4417 { 4418 size_t bytes; 4419 int count, err, i, rid; 4420 4421 rid = PCIR_BAR(2); 4422 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4423 &rid, RF_ACTIVE); 4424 4425 if (sc->msix_table_res == NULL) { 4426 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4427 return ENXIO; 4428 } 4429 4430 count = sc->num_slices; 4431 err = pci_alloc_msix(sc->dev, &count); 4432 if (err != 0) { 4433 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4434 "err = %d \n", sc->num_slices, err); 4435 goto abort_with_msix_table; 4436 } 4437 if (count < sc->num_slices) { 4438 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4439 count, sc->num_slices); 4440 device_printf(sc->dev, 4441 "Try setting hw.mxge.max_slices to %d\n", 4442 count); 4443 err = ENOSPC; 4444 goto abort_with_msix; 4445 } 4446 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4447 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4448 if (sc->msix_irq_res == NULL) { 4449 err = ENOMEM; 4450 goto abort_with_msix; 4451 } 4452 4453 for (i = 0; i < sc->num_slices; i++) { 4454 rid = i + 1; 4455 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4456 SYS_RES_IRQ, 4457 &rid, RF_ACTIVE); 4458 if (sc->msix_irq_res[i] == NULL) { 4459 device_printf(sc->dev, "couldn't allocate IRQ res" 4460 " for message %d\n", i); 4461 err = ENXIO; 4462 goto abort_with_res; 4463 } 4464 } 4465 4466 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4467 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4468 4469 for (i = 0; i < sc->num_slices; i++) { 4470 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4471 INTR_TYPE_NET | INTR_MPSAFE, 4472 #if __FreeBSD_version > 700030 4473 NULL, 4474 #endif 4475 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4476 if (err != 0) { 4477 device_printf(sc->dev, "couldn't setup intr for " 4478 "message %d\n", i); 4479 goto abort_with_intr; 4480 } 4481 } 4482 4483 if (mxge_verbose) { 4484 device_printf(sc->dev, "using %d msix IRQs:", 4485 sc->num_slices); 4486 for (i = 0; i < sc->num_slices; i++) 4487 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4488 printf("\n"); 4489 } 4490 return (0); 4491 4492 abort_with_intr: 4493 for (i = 0; i < sc->num_slices; i++) { 4494 if (sc->msix_ih[i] != NULL) { 4495 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4496 sc->msix_ih[i]); 4497 sc->msix_ih[i] = NULL; 4498 } 4499 } 4500 free(sc->msix_ih, M_DEVBUF); 4501 4502 4503 abort_with_res: 4504 for (i = 0; i < sc->num_slices; i++) { 4505 rid = i + 1; 4506 if (sc->msix_irq_res[i] != NULL) 4507 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4508 sc->msix_irq_res[i]); 4509 sc->msix_irq_res[i] = NULL; 4510 } 4511 free(sc->msix_irq_res, M_DEVBUF); 4512 4513 4514 abort_with_msix: 4515 pci_release_msi(sc->dev); 4516 4517 abort_with_msix_table: 4518 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4519 sc->msix_table_res); 4520 4521 return err; 4522 } 4523 4524 static int 4525 mxge_add_single_irq(mxge_softc_t *sc) 4526 { 4527 int count, err, rid; 4528 4529 count = pci_msi_count(sc->dev); 4530 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4531 rid = 1; 4532 } else { 4533 rid = 0; 4534 sc->legacy_irq = 1; 4535 } 4536 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4537 1, RF_SHAREABLE | RF_ACTIVE); 4538 if (sc->irq_res == NULL) { 4539 device_printf(sc->dev, "could not alloc interrupt\n"); 4540 return ENXIO; 4541 } 4542 if (mxge_verbose) 4543 device_printf(sc->dev, "using %s irq %ld\n", 4544 sc->legacy_irq ? "INTx" : "MSI", 4545 rman_get_start(sc->irq_res)); 4546 err = bus_setup_intr(sc->dev, sc->irq_res, 4547 INTR_TYPE_NET | INTR_MPSAFE, 4548 #if __FreeBSD_version > 700030 4549 NULL, 4550 #endif 4551 mxge_intr, &sc->ss[0], &sc->ih); 4552 if (err != 0) { 4553 bus_release_resource(sc->dev, SYS_RES_IRQ, 4554 sc->legacy_irq ? 0 : 1, sc->irq_res); 4555 if (!sc->legacy_irq) 4556 pci_release_msi(sc->dev); 4557 } 4558 return err; 4559 } 4560 4561 static void 4562 mxge_rem_msix_irqs(mxge_softc_t *sc) 4563 { 4564 int i, rid; 4565 4566 for (i = 0; i < sc->num_slices; i++) { 4567 if (sc->msix_ih[i] != NULL) { 4568 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4569 sc->msix_ih[i]); 4570 sc->msix_ih[i] = NULL; 4571 } 4572 } 4573 free(sc->msix_ih, M_DEVBUF); 4574 4575 for (i = 0; i < sc->num_slices; i++) { 4576 rid = i + 1; 4577 if (sc->msix_irq_res[i] != NULL) 4578 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4579 sc->msix_irq_res[i]); 4580 sc->msix_irq_res[i] = NULL; 4581 } 4582 free(sc->msix_irq_res, M_DEVBUF); 4583 4584 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4585 sc->msix_table_res); 4586 4587 pci_release_msi(sc->dev); 4588 return; 4589 } 4590 4591 static void 4592 mxge_rem_single_irq(mxge_softc_t *sc) 4593 { 4594 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4595 bus_release_resource(sc->dev, SYS_RES_IRQ, 4596 sc->legacy_irq ? 0 : 1, sc->irq_res); 4597 if (!sc->legacy_irq) 4598 pci_release_msi(sc->dev); 4599 } 4600 4601 static void 4602 mxge_rem_irq(mxge_softc_t *sc) 4603 { 4604 if (sc->num_slices > 1) 4605 mxge_rem_msix_irqs(sc); 4606 else 4607 mxge_rem_single_irq(sc); 4608 } 4609 4610 static int 4611 mxge_add_irq(mxge_softc_t *sc) 4612 { 4613 int err; 4614 4615 if (sc->num_slices > 1) 4616 err = mxge_add_msix_irqs(sc); 4617 else 4618 err = mxge_add_single_irq(sc); 4619 4620 if (0 && err == 0 && sc->num_slices > 1) { 4621 mxge_rem_msix_irqs(sc); 4622 err = mxge_add_msix_irqs(sc); 4623 } 4624 return err; 4625 } 4626 4627 4628 static int 4629 mxge_attach(device_t dev) 4630 { 4631 mxge_softc_t *sc = device_get_softc(dev); 4632 struct ifnet *ifp; 4633 int err, rid; 4634 4635 sc->dev = dev; 4636 mxge_fetch_tunables(sc); 4637 4638 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4639 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4640 taskqueue_thread_enqueue, 4641 &sc->tq); 4642 if (sc->tq == NULL) { 4643 err = ENOMEM; 4644 goto abort_with_nothing; 4645 } 4646 4647 err = bus_dma_tag_create(NULL, /* parent */ 4648 1, /* alignment */ 4649 0, /* boundary */ 4650 BUS_SPACE_MAXADDR, /* low */ 4651 BUS_SPACE_MAXADDR, /* high */ 4652 NULL, NULL, /* filter */ 4653 65536 + 256, /* maxsize */ 4654 MXGE_MAX_SEND_DESC, /* num segs */ 4655 65536, /* maxsegsize */ 4656 0, /* flags */ 4657 NULL, NULL, /* lock */ 4658 &sc->parent_dmat); /* tag */ 4659 4660 if (err != 0) { 4661 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4662 err); 4663 goto abort_with_tq; 4664 } 4665 4666 ifp = sc->ifp = if_alloc(IFT_ETHER); 4667 if (ifp == NULL) { 4668 device_printf(dev, "can not if_alloc()\n"); 4669 err = ENOSPC; 4670 goto abort_with_parent_dmat; 4671 } 4672 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4673 4674 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4675 device_get_nameunit(dev)); 4676 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4677 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4678 "%s:drv", device_get_nameunit(dev)); 4679 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4680 MTX_NETWORK_LOCK, MTX_DEF); 4681 4682 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4683 4684 mxge_setup_cfg_space(sc); 4685 4686 /* Map the board into the kernel */ 4687 rid = PCIR_BARS; 4688 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4689 ~0, 1, RF_ACTIVE); 4690 if (sc->mem_res == NULL) { 4691 device_printf(dev, "could not map memory\n"); 4692 err = ENXIO; 4693 goto abort_with_lock; 4694 } 4695 sc->sram = rman_get_virtual(sc->mem_res); 4696 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4697 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4698 device_printf(dev, "impossible memory region size %ld\n", 4699 rman_get_size(sc->mem_res)); 4700 err = ENXIO; 4701 goto abort_with_mem_res; 4702 } 4703 4704 /* make NULL terminated copy of the EEPROM strings section of 4705 lanai SRAM */ 4706 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4707 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4708 rman_get_bushandle(sc->mem_res), 4709 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4710 sc->eeprom_strings, 4711 MXGE_EEPROM_STRINGS_SIZE - 2); 4712 err = mxge_parse_strings(sc); 4713 if (err != 0) 4714 goto abort_with_mem_res; 4715 4716 /* Enable write combining for efficient use of PCIe bus */ 4717 mxge_enable_wc(sc); 4718 4719 /* Allocate the out of band dma memory */ 4720 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4721 sizeof (mxge_cmd_t), 64); 4722 if (err != 0) 4723 goto abort_with_mem_res; 4724 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4725 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4726 if (err != 0) 4727 goto abort_with_cmd_dma; 4728 4729 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4730 if (err != 0) 4731 goto abort_with_zeropad_dma; 4732 4733 /* select & load the firmware */ 4734 err = mxge_select_firmware(sc); 4735 if (err != 0) 4736 goto abort_with_dmabench; 4737 sc->intr_coal_delay = mxge_intr_coal_delay; 4738 4739 mxge_slice_probe(sc); 4740 err = mxge_alloc_slices(sc); 4741 if (err != 0) 4742 goto abort_with_dmabench; 4743 4744 err = mxge_reset(sc, 0); 4745 if (err != 0) 4746 goto abort_with_slices; 4747 4748 err = mxge_alloc_rings(sc); 4749 if (err != 0) { 4750 device_printf(sc->dev, "failed to allocate rings\n"); 4751 goto abort_with_slices; 4752 } 4753 4754 err = mxge_add_irq(sc); 4755 if (err != 0) { 4756 device_printf(sc->dev, "failed to add irq\n"); 4757 goto abort_with_rings; 4758 } 4759 4760 ifp->if_baudrate = IF_Gbps(10UL); 4761 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4762 IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 4763 #ifdef INET 4764 ifp->if_capabilities |= IFCAP_LRO; 4765 #endif 4766 4767 #ifdef MXGE_NEW_VLAN_API 4768 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4769 4770 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4771 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4772 sc->fw_ver_tiny >= 32) 4773 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4774 #endif 4775 4776 sc->max_mtu = mxge_max_mtu(sc); 4777 if (sc->max_mtu >= 9000) 4778 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4779 else 4780 device_printf(dev, "MTU limited to %d. Install " 4781 "latest firmware for 9000 byte jumbo support\n", 4782 sc->max_mtu - ETHER_HDR_LEN); 4783 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4784 ifp->if_capenable = ifp->if_capabilities; 4785 if (sc->lro_cnt == 0) 4786 ifp->if_capenable &= ~IFCAP_LRO; 4787 sc->csum_flag = 1; 4788 ifp->if_init = mxge_init; 4789 ifp->if_softc = sc; 4790 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4791 ifp->if_ioctl = mxge_ioctl; 4792 ifp->if_start = mxge_start; 4793 /* Initialise the ifmedia structure */ 4794 ifmedia_init(&sc->media, 0, mxge_media_change, 4795 mxge_media_status); 4796 mxge_media_init(sc); 4797 mxge_media_probe(sc); 4798 sc->dying = 0; 4799 ether_ifattach(ifp, sc->mac_addr); 4800 /* ether_ifattach sets mtu to ETHERMTU */ 4801 if (mxge_initial_mtu != ETHERMTU) 4802 mxge_change_mtu(sc, mxge_initial_mtu); 4803 4804 mxge_add_sysctls(sc); 4805 #ifdef IFNET_BUF_RING 4806 ifp->if_transmit = mxge_transmit; 4807 ifp->if_qflush = mxge_qflush; 4808 #endif 4809 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4810 device_get_nameunit(sc->dev)); 4811 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4812 return 0; 4813 4814 abort_with_rings: 4815 mxge_free_rings(sc); 4816 abort_with_slices: 4817 mxge_free_slices(sc); 4818 abort_with_dmabench: 4819 mxge_dma_free(&sc->dmabench_dma); 4820 abort_with_zeropad_dma: 4821 mxge_dma_free(&sc->zeropad_dma); 4822 abort_with_cmd_dma: 4823 mxge_dma_free(&sc->cmd_dma); 4824 abort_with_mem_res: 4825 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4826 abort_with_lock: 4827 pci_disable_busmaster(dev); 4828 mtx_destroy(&sc->cmd_mtx); 4829 mtx_destroy(&sc->driver_mtx); 4830 if_free(ifp); 4831 abort_with_parent_dmat: 4832 bus_dma_tag_destroy(sc->parent_dmat); 4833 abort_with_tq: 4834 if (sc->tq != NULL) { 4835 taskqueue_drain(sc->tq, &sc->watchdog_task); 4836 taskqueue_free(sc->tq); 4837 sc->tq = NULL; 4838 } 4839 abort_with_nothing: 4840 return err; 4841 } 4842 4843 static int 4844 mxge_detach(device_t dev) 4845 { 4846 mxge_softc_t *sc = device_get_softc(dev); 4847 4848 if (mxge_vlans_active(sc)) { 4849 device_printf(sc->dev, 4850 "Detach vlans before removing module\n"); 4851 return EBUSY; 4852 } 4853 mtx_lock(&sc->driver_mtx); 4854 sc->dying = 1; 4855 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4856 mxge_close(sc, 0); 4857 mtx_unlock(&sc->driver_mtx); 4858 ether_ifdetach(sc->ifp); 4859 if (sc->tq != NULL) { 4860 taskqueue_drain(sc->tq, &sc->watchdog_task); 4861 taskqueue_free(sc->tq); 4862 sc->tq = NULL; 4863 } 4864 callout_drain(&sc->co_hdl); 4865 ifmedia_removeall(&sc->media); 4866 mxge_dummy_rdma(sc, 0); 4867 mxge_rem_sysctls(sc); 4868 mxge_rem_irq(sc); 4869 mxge_free_rings(sc); 4870 mxge_free_slices(sc); 4871 mxge_dma_free(&sc->dmabench_dma); 4872 mxge_dma_free(&sc->zeropad_dma); 4873 mxge_dma_free(&sc->cmd_dma); 4874 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4875 pci_disable_busmaster(dev); 4876 mtx_destroy(&sc->cmd_mtx); 4877 mtx_destroy(&sc->driver_mtx); 4878 if_free(sc->ifp); 4879 bus_dma_tag_destroy(sc->parent_dmat); 4880 return 0; 4881 } 4882 4883 static int 4884 mxge_shutdown(device_t dev) 4885 { 4886 return 0; 4887 } 4888 4889 /* 4890 This file uses Myri10GE driver indentation. 4891 4892 Local Variables: 4893 c-file-style:"linux" 4894 tab-width:8 4895 End: 4896 */ 4897