1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <machine/bus.h> 68 #include <machine/in_cksum.h> 69 #include <machine/resource.h> 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 #include <sys/smp.h> 73 74 #include <dev/pci/pcireg.h> 75 #include <dev/pci/pcivar.h> 76 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 77 78 #include <vm/vm.h> /* for pmap_mapdev() */ 79 #include <vm/pmap.h> 80 81 #if defined(__i386) || defined(__amd64) 82 #include <machine/specialreg.h> 83 #endif 84 85 #include <dev/mxge/mxge_mcp.h> 86 #include <dev/mxge/mcp_gen_header.h> 87 /*#define MXGE_FAKE_IFP*/ 88 #include <dev/mxge/if_mxge_var.h> 89 #ifdef IFNET_BUF_RING 90 #include <sys/buf_ring.h> 91 #endif 92 93 #include "opt_inet.h" 94 95 /* tunable params */ 96 static int mxge_nvidia_ecrc_enable = 1; 97 static int mxge_force_firmware = 0; 98 static int mxge_intr_coal_delay = 30; 99 static int mxge_deassert_wait = 1; 100 static int mxge_flow_control = 1; 101 static int mxge_verbose = 0; 102 static int mxge_lro_cnt = 8; 103 static int mxge_ticks; 104 static int mxge_max_slices = 1; 105 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 106 static int mxge_always_promisc = 0; 107 static int mxge_initial_mtu = ETHERMTU_JUMBO; 108 static int mxge_throttle = 0; 109 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 110 static char *mxge_fw_aligned = "mxge_eth_z8e"; 111 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 112 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 113 114 static int mxge_probe(device_t dev); 115 static int mxge_attach(device_t dev); 116 static int mxge_detach(device_t dev); 117 static int mxge_shutdown(device_t dev); 118 static void mxge_intr(void *arg); 119 120 static device_method_t mxge_methods[] = 121 { 122 /* Device interface */ 123 DEVMETHOD(device_probe, mxge_probe), 124 DEVMETHOD(device_attach, mxge_attach), 125 DEVMETHOD(device_detach, mxge_detach), 126 DEVMETHOD(device_shutdown, mxge_shutdown), 127 {0, 0} 128 }; 129 130 static driver_t mxge_driver = 131 { 132 "mxge", 133 mxge_methods, 134 sizeof(mxge_softc_t), 135 }; 136 137 static devclass_t mxge_devclass; 138 139 /* Declare ourselves to be a child of the PCI bus.*/ 140 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 141 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 142 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 143 144 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 145 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 146 static int mxge_close(mxge_softc_t *sc, int down); 147 static int mxge_open(mxge_softc_t *sc); 148 static void mxge_tick(void *arg); 149 150 static int 151 mxge_probe(device_t dev) 152 { 153 int rev; 154 155 156 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 157 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 158 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 159 rev = pci_get_revid(dev); 160 switch (rev) { 161 case MXGE_PCI_REV_Z8E: 162 device_set_desc(dev, "Myri10G-PCIE-8A"); 163 break; 164 case MXGE_PCI_REV_Z8ES: 165 device_set_desc(dev, "Myri10G-PCIE-8B"); 166 break; 167 default: 168 device_set_desc(dev, "Myri10G-PCIE-8??"); 169 device_printf(dev, "Unrecognized rev %d NIC\n", 170 rev); 171 break; 172 } 173 return 0; 174 } 175 return ENXIO; 176 } 177 178 static void 179 mxge_enable_wc(mxge_softc_t *sc) 180 { 181 #if defined(__i386) || defined(__amd64) 182 vm_offset_t len; 183 int err; 184 185 sc->wc = 1; 186 len = rman_get_size(sc->mem_res); 187 err = pmap_change_attr((vm_offset_t) sc->sram, 188 len, PAT_WRITE_COMBINING); 189 if (err != 0) { 190 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 191 err); 192 sc->wc = 0; 193 } 194 #endif 195 } 196 197 198 /* callback to get our DMA address */ 199 static void 200 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 201 int error) 202 { 203 if (error == 0) { 204 *(bus_addr_t *) arg = segs->ds_addr; 205 } 206 } 207 208 static int 209 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 210 bus_size_t alignment) 211 { 212 int err; 213 device_t dev = sc->dev; 214 bus_size_t boundary, maxsegsize; 215 216 if (bytes > 4096 && alignment == 4096) { 217 boundary = 0; 218 maxsegsize = bytes; 219 } else { 220 boundary = 4096; 221 maxsegsize = 4096; 222 } 223 224 /* allocate DMAable memory tags */ 225 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 226 alignment, /* alignment */ 227 boundary, /* boundary */ 228 BUS_SPACE_MAXADDR, /* low */ 229 BUS_SPACE_MAXADDR, /* high */ 230 NULL, NULL, /* filter */ 231 bytes, /* maxsize */ 232 1, /* num segs */ 233 maxsegsize, /* maxsegsize */ 234 BUS_DMA_COHERENT, /* flags */ 235 NULL, NULL, /* lock */ 236 &dma->dmat); /* tag */ 237 if (err != 0) { 238 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 239 return err; 240 } 241 242 /* allocate DMAable memory & map */ 243 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 244 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 245 | BUS_DMA_ZERO), &dma->map); 246 if (err != 0) { 247 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 248 goto abort_with_dmat; 249 } 250 251 /* load the memory */ 252 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 253 mxge_dmamap_callback, 254 (void *)&dma->bus_addr, 0); 255 if (err != 0) { 256 device_printf(dev, "couldn't load map (err = %d)\n", err); 257 goto abort_with_mem; 258 } 259 return 0; 260 261 abort_with_mem: 262 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 263 abort_with_dmat: 264 (void)bus_dma_tag_destroy(dma->dmat); 265 return err; 266 } 267 268 269 static void 270 mxge_dma_free(mxge_dma_t *dma) 271 { 272 bus_dmamap_unload(dma->dmat, dma->map); 273 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 274 (void)bus_dma_tag_destroy(dma->dmat); 275 } 276 277 /* 278 * The eeprom strings on the lanaiX have the format 279 * SN=x\0 280 * MAC=x:x:x:x:x:x\0 281 * PC=text\0 282 */ 283 284 static int 285 mxge_parse_strings(mxge_softc_t *sc) 286 { 287 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 288 289 char *ptr, *limit; 290 int i, found_mac; 291 292 ptr = sc->eeprom_strings; 293 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 294 found_mac = 0; 295 while (ptr < limit && *ptr != '\0') { 296 if (memcmp(ptr, "MAC=", 4) == 0) { 297 ptr += 1; 298 sc->mac_addr_string = ptr; 299 for (i = 0; i < 6; i++) { 300 ptr += 3; 301 if ((ptr + 2) > limit) 302 goto abort; 303 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 304 found_mac = 1; 305 } 306 } else if (memcmp(ptr, "PC=", 3) == 0) { 307 ptr += 3; 308 strncpy(sc->product_code_string, ptr, 309 sizeof (sc->product_code_string) - 1); 310 } else if (memcmp(ptr, "SN=", 3) == 0) { 311 ptr += 3; 312 strncpy(sc->serial_number_string, ptr, 313 sizeof (sc->serial_number_string) - 1); 314 } 315 MXGE_NEXT_STRING(ptr); 316 } 317 318 if (found_mac) 319 return 0; 320 321 abort: 322 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 323 324 return ENXIO; 325 } 326 327 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 328 static void 329 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 330 { 331 uint32_t val; 332 unsigned long base, off; 333 char *va, *cfgptr; 334 device_t pdev, mcp55; 335 uint16_t vendor_id, device_id, word; 336 uintptr_t bus, slot, func, ivend, idev; 337 uint32_t *ptr32; 338 339 340 if (!mxge_nvidia_ecrc_enable) 341 return; 342 343 pdev = device_get_parent(device_get_parent(sc->dev)); 344 if (pdev == NULL) { 345 device_printf(sc->dev, "could not find parent?\n"); 346 return; 347 } 348 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 349 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 350 351 if (vendor_id != 0x10de) 352 return; 353 354 base = 0; 355 356 if (device_id == 0x005d) { 357 /* ck804, base address is magic */ 358 base = 0xe0000000UL; 359 } else if (device_id >= 0x0374 && device_id <= 0x378) { 360 /* mcp55, base address stored in chipset */ 361 mcp55 = pci_find_bsf(0, 0, 0); 362 if (mcp55 && 363 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 364 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 365 word = pci_read_config(mcp55, 0x90, 2); 366 base = ((unsigned long)word & 0x7ffeU) << 25; 367 } 368 } 369 if (!base) 370 return; 371 372 /* XXXX 373 Test below is commented because it is believed that doing 374 config read/write beyond 0xff will access the config space 375 for the next larger function. Uncomment this and remove 376 the hacky pmap_mapdev() way of accessing config space when 377 FreeBSD grows support for extended pcie config space access 378 */ 379 #if 0 380 /* See if we can, by some miracle, access the extended 381 config space */ 382 val = pci_read_config(pdev, 0x178, 4); 383 if (val != 0xffffffff) { 384 val |= 0x40; 385 pci_write_config(pdev, 0x178, val, 4); 386 return; 387 } 388 #endif 389 /* Rather than using normal pci config space writes, we must 390 * map the Nvidia config space ourselves. This is because on 391 * opteron/nvidia class machine the 0xe000000 mapping is 392 * handled by the nvidia chipset, that means the internal PCI 393 * device (the on-chip northbridge), or the amd-8131 bridge 394 * and things behind them are not visible by this method. 395 */ 396 397 BUS_READ_IVAR(device_get_parent(pdev), pdev, 398 PCI_IVAR_BUS, &bus); 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_SLOT, &slot); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_FUNCTION, &func); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_VENDOR, &ivend); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_DEVICE, &idev); 407 408 off = base 409 + 0x00100000UL * (unsigned long)bus 410 + 0x00001000UL * (unsigned long)(func 411 + 8 * slot); 412 413 /* map it into the kernel */ 414 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 415 416 417 if (va == NULL) { 418 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 419 return; 420 } 421 /* get a pointer to the config space mapped into the kernel */ 422 cfgptr = va + (off & PAGE_MASK); 423 424 /* make sure that we can really access it */ 425 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 426 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 427 if (! (vendor_id == ivend && device_id == idev)) { 428 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 429 vendor_id, device_id); 430 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 431 return; 432 } 433 434 ptr32 = (uint32_t*)(cfgptr + 0x178); 435 val = *ptr32; 436 437 if (val == 0xffffffff) { 438 device_printf(sc->dev, "extended mapping failed\n"); 439 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 440 return; 441 } 442 *ptr32 = val | 0x40; 443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 444 if (mxge_verbose) 445 device_printf(sc->dev, 446 "Enabled ECRC on upstream Nvidia bridge " 447 "at %d:%d:%d\n", 448 (int)bus, (int)slot, (int)func); 449 return; 450 } 451 #else 452 static void 453 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 454 { 455 device_printf(sc->dev, 456 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 457 return; 458 } 459 #endif 460 461 462 static int 463 mxge_dma_test(mxge_softc_t *sc, int test_type) 464 { 465 mxge_cmd_t cmd; 466 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 467 int status; 468 uint32_t len; 469 char *test = " "; 470 471 472 /* Run a small DMA test. 473 * The magic multipliers to the length tell the firmware 474 * to do DMA read, write, or read+write tests. The 475 * results are returned in cmd.data0. The upper 16 476 * bits of the return is the number of transfers completed. 477 * The lower 16 bits is the time in 0.5us ticks that the 478 * transfers took to complete. 479 */ 480 481 len = sc->tx_boundary; 482 483 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 484 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 485 cmd.data2 = len * 0x10000; 486 status = mxge_send_cmd(sc, test_type, &cmd); 487 if (status != 0) { 488 test = "read"; 489 goto abort; 490 } 491 sc->read_dma = ((cmd.data0>>16) * len * 2) / 492 (cmd.data0 & 0xffff); 493 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 494 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 495 cmd.data2 = len * 0x1; 496 status = mxge_send_cmd(sc, test_type, &cmd); 497 if (status != 0) { 498 test = "write"; 499 goto abort; 500 } 501 sc->write_dma = ((cmd.data0>>16) * len * 2) / 502 (cmd.data0 & 0xffff); 503 504 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 505 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 506 cmd.data2 = len * 0x10001; 507 status = mxge_send_cmd(sc, test_type, &cmd); 508 if (status != 0) { 509 test = "read/write"; 510 goto abort; 511 } 512 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 513 (cmd.data0 & 0xffff); 514 515 abort: 516 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 517 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 518 test, status); 519 520 return status; 521 } 522 523 /* 524 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 525 * when the PCI-E Completion packets are aligned on an 8-byte 526 * boundary. Some PCI-E chip sets always align Completion packets; on 527 * the ones that do not, the alignment can be enforced by enabling 528 * ECRC generation (if supported). 529 * 530 * When PCI-E Completion packets are not aligned, it is actually more 531 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 532 * 533 * If the driver can neither enable ECRC nor verify that it has 534 * already been enabled, then it must use a firmware image which works 535 * around unaligned completion packets (ethp_z8e.dat), and it should 536 * also ensure that it never gives the device a Read-DMA which is 537 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 538 * enabled, then the driver should use the aligned (eth_z8e.dat) 539 * firmware image, and set tx_boundary to 4KB. 540 */ 541 542 static int 543 mxge_firmware_probe(mxge_softc_t *sc) 544 { 545 device_t dev = sc->dev; 546 int reg, status; 547 uint16_t pectl; 548 549 sc->tx_boundary = 4096; 550 /* 551 * Verify the max read request size was set to 4KB 552 * before trying the test with 4KB. 553 */ 554 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 555 pectl = pci_read_config(dev, reg + 0x8, 2); 556 if ((pectl & (5 << 12)) != (5 << 12)) { 557 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 558 pectl); 559 sc->tx_boundary = 2048; 560 } 561 } 562 563 /* 564 * load the optimized firmware (which assumes aligned PCIe 565 * completions) in order to see if it works on this host. 566 */ 567 sc->fw_name = mxge_fw_aligned; 568 status = mxge_load_firmware(sc, 1); 569 if (status != 0) { 570 return status; 571 } 572 573 /* 574 * Enable ECRC if possible 575 */ 576 mxge_enable_nvidia_ecrc(sc); 577 578 /* 579 * Run a DMA test which watches for unaligned completions and 580 * aborts on the first one seen. 581 */ 582 583 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 584 if (status == 0) 585 return 0; /* keep the aligned firmware */ 586 587 if (status != E2BIG) 588 device_printf(dev, "DMA test failed: %d\n", status); 589 if (status == ENOSYS) 590 device_printf(dev, "Falling back to ethp! " 591 "Please install up to date fw\n"); 592 return status; 593 } 594 595 static int 596 mxge_select_firmware(mxge_softc_t *sc) 597 { 598 int aligned = 0; 599 int force_firmware = mxge_force_firmware; 600 601 if (sc->throttle) 602 force_firmware = sc->throttle; 603 604 if (force_firmware != 0) { 605 if (force_firmware == 1) 606 aligned = 1; 607 else 608 aligned = 0; 609 if (mxge_verbose) 610 device_printf(sc->dev, 611 "Assuming %s completions (forced)\n", 612 aligned ? "aligned" : "unaligned"); 613 goto abort; 614 } 615 616 /* if the PCIe link width is 4 or less, we can use the aligned 617 firmware and skip any checks */ 618 if (sc->link_width != 0 && sc->link_width <= 4) { 619 device_printf(sc->dev, 620 "PCIe x%d Link, expect reduced performance\n", 621 sc->link_width); 622 aligned = 1; 623 goto abort; 624 } 625 626 if (0 == mxge_firmware_probe(sc)) 627 return 0; 628 629 abort: 630 if (aligned) { 631 sc->fw_name = mxge_fw_aligned; 632 sc->tx_boundary = 4096; 633 } else { 634 sc->fw_name = mxge_fw_unaligned; 635 sc->tx_boundary = 2048; 636 } 637 return (mxge_load_firmware(sc, 0)); 638 } 639 640 union qualhack 641 { 642 const char *ro_char; 643 char *rw_char; 644 }; 645 646 static int 647 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 648 { 649 650 651 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 652 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 653 be32toh(hdr->mcp_type)); 654 return EIO; 655 } 656 657 /* save firmware version for sysctl */ 658 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 659 if (mxge_verbose) 660 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 661 662 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 663 &sc->fw_ver_minor, &sc->fw_ver_tiny); 664 665 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 666 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 667 device_printf(sc->dev, "Found firmware version %s\n", 668 sc->fw_version); 669 device_printf(sc->dev, "Driver needs %d.%d\n", 670 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 671 return EINVAL; 672 } 673 return 0; 674 675 } 676 677 static void * 678 z_alloc(void *nil, u_int items, u_int size) 679 { 680 void *ptr; 681 682 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 683 return ptr; 684 } 685 686 static void 687 z_free(void *nil, void *ptr) 688 { 689 free(ptr, M_TEMP); 690 } 691 692 693 static int 694 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 695 { 696 z_stream zs; 697 char *inflate_buffer; 698 const struct firmware *fw; 699 const mcp_gen_header_t *hdr; 700 unsigned hdr_offset; 701 int status; 702 unsigned int i; 703 char dummy; 704 size_t fw_len; 705 706 fw = firmware_get(sc->fw_name); 707 if (fw == NULL) { 708 device_printf(sc->dev, "Could not find firmware image %s\n", 709 sc->fw_name); 710 return ENOENT; 711 } 712 713 714 715 /* setup zlib and decompress f/w */ 716 bzero(&zs, sizeof (zs)); 717 zs.zalloc = z_alloc; 718 zs.zfree = z_free; 719 status = inflateInit(&zs); 720 if (status != Z_OK) { 721 status = EIO; 722 goto abort_with_fw; 723 } 724 725 /* the uncompressed size is stored as the firmware version, 726 which would otherwise go unused */ 727 fw_len = (size_t) fw->version; 728 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 729 if (inflate_buffer == NULL) 730 goto abort_with_zs; 731 zs.avail_in = fw->datasize; 732 zs.next_in = __DECONST(char *, fw->data); 733 zs.avail_out = fw_len; 734 zs.next_out = inflate_buffer; 735 status = inflate(&zs, Z_FINISH); 736 if (status != Z_STREAM_END) { 737 device_printf(sc->dev, "zlib %d\n", status); 738 status = EIO; 739 goto abort_with_buffer; 740 } 741 742 /* check id */ 743 hdr_offset = htobe32(*(const uint32_t *) 744 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 745 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 746 device_printf(sc->dev, "Bad firmware file"); 747 status = EIO; 748 goto abort_with_buffer; 749 } 750 hdr = (const void*)(inflate_buffer + hdr_offset); 751 752 status = mxge_validate_firmware(sc, hdr); 753 if (status != 0) 754 goto abort_with_buffer; 755 756 /* Copy the inflated firmware to NIC SRAM. */ 757 for (i = 0; i < fw_len; i += 256) { 758 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 759 inflate_buffer + i, 760 min(256U, (unsigned)(fw_len - i))); 761 wmb(); 762 dummy = *sc->sram; 763 wmb(); 764 } 765 766 *limit = fw_len; 767 status = 0; 768 abort_with_buffer: 769 free(inflate_buffer, M_TEMP); 770 abort_with_zs: 771 inflateEnd(&zs); 772 abort_with_fw: 773 firmware_put(fw, FIRMWARE_UNLOAD); 774 return status; 775 } 776 777 /* 778 * Enable or disable periodic RDMAs from the host to make certain 779 * chipsets resend dropped PCIe messages 780 */ 781 782 static void 783 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 784 { 785 char buf_bytes[72]; 786 volatile uint32_t *confirm; 787 volatile char *submit; 788 uint32_t *buf, dma_low, dma_high; 789 int i; 790 791 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 792 793 /* clear confirmation addr */ 794 confirm = (volatile uint32_t *)sc->cmd; 795 *confirm = 0; 796 wmb(); 797 798 /* send an rdma command to the PCIe engine, and wait for the 799 response in the confirmation address. The firmware should 800 write a -1 there to indicate it is alive and well 801 */ 802 803 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 804 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 805 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 806 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 807 buf[2] = htobe32(0xffffffff); /* confirm data */ 808 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 809 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 810 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 811 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 812 buf[5] = htobe32(enable); /* enable? */ 813 814 815 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 816 817 mxge_pio_copy(submit, buf, 64); 818 wmb(); 819 DELAY(1000); 820 wmb(); 821 i = 0; 822 while (*confirm != 0xffffffff && i < 20) { 823 DELAY(1000); 824 i++; 825 } 826 if (*confirm != 0xffffffff) { 827 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 828 (enable ? "enable" : "disable"), confirm, 829 *confirm); 830 } 831 return; 832 } 833 834 static int 835 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 836 { 837 mcp_cmd_t *buf; 838 char buf_bytes[sizeof(*buf) + 8]; 839 volatile mcp_cmd_response_t *response = sc->cmd; 840 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 841 uint32_t dma_low, dma_high; 842 int err, sleep_total = 0; 843 844 /* ensure buf is aligned to 8 bytes */ 845 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 846 847 buf->data0 = htobe32(data->data0); 848 buf->data1 = htobe32(data->data1); 849 buf->data2 = htobe32(data->data2); 850 buf->cmd = htobe32(cmd); 851 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 852 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 853 854 buf->response_addr.low = htobe32(dma_low); 855 buf->response_addr.high = htobe32(dma_high); 856 mtx_lock(&sc->cmd_mtx); 857 response->result = 0xffffffff; 858 wmb(); 859 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 860 861 /* wait up to 20ms */ 862 err = EAGAIN; 863 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 864 bus_dmamap_sync(sc->cmd_dma.dmat, 865 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 866 wmb(); 867 switch (be32toh(response->result)) { 868 case 0: 869 data->data0 = be32toh(response->data); 870 err = 0; 871 break; 872 case 0xffffffff: 873 DELAY(1000); 874 break; 875 case MXGEFW_CMD_UNKNOWN: 876 err = ENOSYS; 877 break; 878 case MXGEFW_CMD_ERROR_UNALIGNED: 879 err = E2BIG; 880 break; 881 case MXGEFW_CMD_ERROR_BUSY: 882 err = EBUSY; 883 break; 884 case MXGEFW_CMD_ERROR_I2C_ABSENT: 885 err = ENXIO; 886 break; 887 default: 888 device_printf(sc->dev, 889 "mxge: command %d " 890 "failed, result = %d\n", 891 cmd, be32toh(response->result)); 892 err = ENXIO; 893 break; 894 } 895 if (err != EAGAIN) 896 break; 897 } 898 if (err == EAGAIN) 899 device_printf(sc->dev, "mxge: command %d timed out" 900 "result = %d\n", 901 cmd, be32toh(response->result)); 902 mtx_unlock(&sc->cmd_mtx); 903 return err; 904 } 905 906 static int 907 mxge_adopt_running_firmware(mxge_softc_t *sc) 908 { 909 struct mcp_gen_header *hdr; 910 const size_t bytes = sizeof (struct mcp_gen_header); 911 size_t hdr_offset; 912 int status; 913 914 /* find running firmware header */ 915 hdr_offset = htobe32(*(volatile uint32_t *) 916 (sc->sram + MCP_HEADER_PTR_OFFSET)); 917 918 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 919 device_printf(sc->dev, 920 "Running firmware has bad header offset (%d)\n", 921 (int)hdr_offset); 922 return EIO; 923 } 924 925 /* copy header of running firmware from SRAM to host memory to 926 * validate firmware */ 927 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 928 if (hdr == NULL) { 929 device_printf(sc->dev, "could not malloc firmware hdr\n"); 930 return ENOMEM; 931 } 932 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 933 rman_get_bushandle(sc->mem_res), 934 hdr_offset, (char *)hdr, bytes); 935 status = mxge_validate_firmware(sc, hdr); 936 free(hdr, M_DEVBUF); 937 938 /* 939 * check to see if adopted firmware has bug where adopting 940 * it will cause broadcasts to be filtered unless the NIC 941 * is kept in ALLMULTI mode 942 */ 943 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 944 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 945 sc->adopted_rx_filter_bug = 1; 946 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 947 "working around rx filter bug\n", 948 sc->fw_ver_major, sc->fw_ver_minor, 949 sc->fw_ver_tiny); 950 } 951 952 return status; 953 } 954 955 956 static int 957 mxge_load_firmware(mxge_softc_t *sc, int adopt) 958 { 959 volatile uint32_t *confirm; 960 volatile char *submit; 961 char buf_bytes[72]; 962 uint32_t *buf, size, dma_low, dma_high; 963 int status, i; 964 965 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 966 967 size = sc->sram_size; 968 status = mxge_load_firmware_helper(sc, &size); 969 if (status) { 970 if (!adopt) 971 return status; 972 /* Try to use the currently running firmware, if 973 it is new enough */ 974 status = mxge_adopt_running_firmware(sc); 975 if (status) { 976 device_printf(sc->dev, 977 "failed to adopt running firmware\n"); 978 return status; 979 } 980 device_printf(sc->dev, 981 "Successfully adopted running firmware\n"); 982 if (sc->tx_boundary == 4096) { 983 device_printf(sc->dev, 984 "Using firmware currently running on NIC" 985 ". For optimal\n"); 986 device_printf(sc->dev, 987 "performance consider loading optimized " 988 "firmware\n"); 989 } 990 sc->fw_name = mxge_fw_unaligned; 991 sc->tx_boundary = 2048; 992 return 0; 993 } 994 /* clear confirmation addr */ 995 confirm = (volatile uint32_t *)sc->cmd; 996 *confirm = 0; 997 wmb(); 998 /* send a reload command to the bootstrap MCP, and wait for the 999 response in the confirmation address. The firmware should 1000 write a -1 there to indicate it is alive and well 1001 */ 1002 1003 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1004 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1005 1006 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1007 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1008 buf[2] = htobe32(0xffffffff); /* confirm data */ 1009 1010 /* FIX: All newest firmware should un-protect the bottom of 1011 the sram before handoff. However, the very first interfaces 1012 do not. Therefore the handoff copy must skip the first 8 bytes 1013 */ 1014 /* where the code starts*/ 1015 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1016 buf[4] = htobe32(size - 8); /* length of code */ 1017 buf[5] = htobe32(8); /* where to copy to */ 1018 buf[6] = htobe32(0); /* where to jump to */ 1019 1020 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1021 mxge_pio_copy(submit, buf, 64); 1022 wmb(); 1023 DELAY(1000); 1024 wmb(); 1025 i = 0; 1026 while (*confirm != 0xffffffff && i < 20) { 1027 DELAY(1000*10); 1028 i++; 1029 bus_dmamap_sync(sc->cmd_dma.dmat, 1030 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1031 } 1032 if (*confirm != 0xffffffff) { 1033 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1034 confirm, *confirm); 1035 1036 return ENXIO; 1037 } 1038 return 0; 1039 } 1040 1041 static int 1042 mxge_update_mac_address(mxge_softc_t *sc) 1043 { 1044 mxge_cmd_t cmd; 1045 uint8_t *addr = sc->mac_addr; 1046 int status; 1047 1048 1049 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1050 | (addr[2] << 8) | addr[3]); 1051 1052 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1053 1054 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1055 return status; 1056 } 1057 1058 static int 1059 mxge_change_pause(mxge_softc_t *sc, int pause) 1060 { 1061 mxge_cmd_t cmd; 1062 int status; 1063 1064 if (pause) 1065 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1066 &cmd); 1067 else 1068 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1069 &cmd); 1070 1071 if (status) { 1072 device_printf(sc->dev, "Failed to set flow control mode\n"); 1073 return ENXIO; 1074 } 1075 sc->pause = pause; 1076 return 0; 1077 } 1078 1079 static void 1080 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1081 { 1082 mxge_cmd_t cmd; 1083 int status; 1084 1085 if (mxge_always_promisc) 1086 promisc = 1; 1087 1088 if (promisc) 1089 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1090 &cmd); 1091 else 1092 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1093 &cmd); 1094 1095 if (status) { 1096 device_printf(sc->dev, "Failed to set promisc mode\n"); 1097 } 1098 } 1099 1100 static void 1101 mxge_set_multicast_list(mxge_softc_t *sc) 1102 { 1103 mxge_cmd_t cmd; 1104 struct ifmultiaddr *ifma; 1105 struct ifnet *ifp = sc->ifp; 1106 int err; 1107 1108 /* This firmware is known to not support multicast */ 1109 if (!sc->fw_multicast_support) 1110 return; 1111 1112 /* Disable multicast filtering while we play with the lists*/ 1113 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1114 if (err != 0) { 1115 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1116 " error status: %d\n", err); 1117 return; 1118 } 1119 1120 if (sc->adopted_rx_filter_bug) 1121 return; 1122 1123 if (ifp->if_flags & IFF_ALLMULTI) 1124 /* request to disable multicast filtering, so quit here */ 1125 return; 1126 1127 /* Flush all the filters */ 1128 1129 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1130 if (err != 0) { 1131 device_printf(sc->dev, 1132 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1133 ", error status: %d\n", err); 1134 return; 1135 } 1136 1137 /* Walk the multicast list, and add each address */ 1138 1139 if_maddr_rlock(ifp); 1140 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1141 if (ifma->ifma_addr->sa_family != AF_LINK) 1142 continue; 1143 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1144 &cmd.data0, 4); 1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1146 &cmd.data1, 2); 1147 cmd.data0 = htonl(cmd.data0); 1148 cmd.data1 = htonl(cmd.data1); 1149 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1150 if (err != 0) { 1151 device_printf(sc->dev, "Failed " 1152 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1153 "%d\t", err); 1154 /* abort, leaving multicast filtering off */ 1155 if_maddr_runlock(ifp); 1156 return; 1157 } 1158 } 1159 if_maddr_runlock(ifp); 1160 /* Enable multicast filtering */ 1161 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1162 if (err != 0) { 1163 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1164 ", error status: %d\n", err); 1165 } 1166 } 1167 1168 static int 1169 mxge_max_mtu(mxge_softc_t *sc) 1170 { 1171 mxge_cmd_t cmd; 1172 int status; 1173 1174 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1175 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1176 1177 /* try to set nbufs to see if it we can 1178 use virtually contiguous jumbos */ 1179 cmd.data0 = 0; 1180 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1181 &cmd); 1182 if (status == 0) 1183 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1184 1185 /* otherwise, we're limited to MJUMPAGESIZE */ 1186 return MJUMPAGESIZE - MXGEFW_PAD; 1187 } 1188 1189 static int 1190 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1191 { 1192 struct mxge_slice_state *ss; 1193 mxge_rx_done_t *rx_done; 1194 volatile uint32_t *irq_claim; 1195 mxge_cmd_t cmd; 1196 int slice, status; 1197 1198 /* try to send a reset command to the card to see if it 1199 is alive */ 1200 memset(&cmd, 0, sizeof (cmd)); 1201 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1202 if (status != 0) { 1203 device_printf(sc->dev, "failed reset\n"); 1204 return ENXIO; 1205 } 1206 1207 mxge_dummy_rdma(sc, 1); 1208 1209 1210 /* set the intrq size */ 1211 cmd.data0 = sc->rx_ring_size; 1212 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1213 1214 /* 1215 * Even though we already know how many slices are supported 1216 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1217 * has magic side effects, and must be called after a reset. 1218 * It must be called prior to calling any RSS related cmds, 1219 * including assigning an interrupt queue for anything but 1220 * slice 0. It must also be called *after* 1221 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1222 * the firmware to compute offsets. 1223 */ 1224 1225 if (sc->num_slices > 1) { 1226 /* ask the maximum number of slices it supports */ 1227 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1228 &cmd); 1229 if (status != 0) { 1230 device_printf(sc->dev, 1231 "failed to get number of slices\n"); 1232 return status; 1233 } 1234 /* 1235 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1236 * to setting up the interrupt queue DMA 1237 */ 1238 cmd.data0 = sc->num_slices; 1239 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1240 #ifdef IFNET_BUF_RING 1241 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1242 #endif 1243 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1244 &cmd); 1245 if (status != 0) { 1246 device_printf(sc->dev, 1247 "failed to set number of slices\n"); 1248 return status; 1249 } 1250 } 1251 1252 1253 if (interrupts_setup) { 1254 /* Now exchange information about interrupts */ 1255 for (slice = 0; slice < sc->num_slices; slice++) { 1256 rx_done = &sc->ss[slice].rx_done; 1257 memset(rx_done->entry, 0, sc->rx_ring_size); 1258 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1259 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1260 cmd.data2 = slice; 1261 status |= mxge_send_cmd(sc, 1262 MXGEFW_CMD_SET_INTRQ_DMA, 1263 &cmd); 1264 } 1265 } 1266 1267 status |= mxge_send_cmd(sc, 1268 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1269 1270 1271 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1272 1273 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1274 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1275 1276 1277 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1278 &cmd); 1279 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1280 if (status != 0) { 1281 device_printf(sc->dev, "failed set interrupt parameters\n"); 1282 return status; 1283 } 1284 1285 1286 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1287 1288 1289 /* run a DMA benchmark */ 1290 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1291 1292 for (slice = 0; slice < sc->num_slices; slice++) { 1293 ss = &sc->ss[slice]; 1294 1295 ss->irq_claim = irq_claim + (2 * slice); 1296 /* reset mcp/driver shared state back to 0 */ 1297 ss->rx_done.idx = 0; 1298 ss->rx_done.cnt = 0; 1299 ss->tx.req = 0; 1300 ss->tx.done = 0; 1301 ss->tx.pkt_done = 0; 1302 ss->tx.queue_active = 0; 1303 ss->tx.activate = 0; 1304 ss->tx.deactivate = 0; 1305 ss->tx.wake = 0; 1306 ss->tx.defrag = 0; 1307 ss->tx.stall = 0; 1308 ss->rx_big.cnt = 0; 1309 ss->rx_small.cnt = 0; 1310 ss->lro_bad_csum = 0; 1311 ss->lro_queued = 0; 1312 ss->lro_flushed = 0; 1313 if (ss->fw_stats != NULL) { 1314 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1315 } 1316 } 1317 sc->rdma_tags_available = 15; 1318 status = mxge_update_mac_address(sc); 1319 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1320 mxge_change_pause(sc, sc->pause); 1321 mxge_set_multicast_list(sc); 1322 if (sc->throttle) { 1323 cmd.data0 = sc->throttle; 1324 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1325 &cmd)) { 1326 device_printf(sc->dev, 1327 "can't enable throttle\n"); 1328 } 1329 } 1330 return status; 1331 } 1332 1333 static int 1334 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1335 { 1336 mxge_cmd_t cmd; 1337 mxge_softc_t *sc; 1338 int err; 1339 unsigned int throttle; 1340 1341 sc = arg1; 1342 throttle = sc->throttle; 1343 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1344 if (err != 0) { 1345 return err; 1346 } 1347 1348 if (throttle == sc->throttle) 1349 return 0; 1350 1351 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1352 return EINVAL; 1353 1354 mtx_lock(&sc->driver_mtx); 1355 cmd.data0 = throttle; 1356 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1357 if (err == 0) 1358 sc->throttle = throttle; 1359 mtx_unlock(&sc->driver_mtx); 1360 return err; 1361 } 1362 1363 static int 1364 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1365 { 1366 mxge_softc_t *sc; 1367 unsigned int intr_coal_delay; 1368 int err; 1369 1370 sc = arg1; 1371 intr_coal_delay = sc->intr_coal_delay; 1372 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1373 if (err != 0) { 1374 return err; 1375 } 1376 if (intr_coal_delay == sc->intr_coal_delay) 1377 return 0; 1378 1379 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1380 return EINVAL; 1381 1382 mtx_lock(&sc->driver_mtx); 1383 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1384 sc->intr_coal_delay = intr_coal_delay; 1385 1386 mtx_unlock(&sc->driver_mtx); 1387 return err; 1388 } 1389 1390 static int 1391 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1392 { 1393 mxge_softc_t *sc; 1394 unsigned int enabled; 1395 int err; 1396 1397 sc = arg1; 1398 enabled = sc->pause; 1399 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1400 if (err != 0) { 1401 return err; 1402 } 1403 if (enabled == sc->pause) 1404 return 0; 1405 1406 mtx_lock(&sc->driver_mtx); 1407 err = mxge_change_pause(sc, enabled); 1408 mtx_unlock(&sc->driver_mtx); 1409 return err; 1410 } 1411 1412 static int 1413 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1414 { 1415 struct ifnet *ifp; 1416 int err = 0; 1417 1418 ifp = sc->ifp; 1419 if (lro_cnt == 0) 1420 ifp->if_capenable &= ~IFCAP_LRO; 1421 else 1422 ifp->if_capenable |= IFCAP_LRO; 1423 sc->lro_cnt = lro_cnt; 1424 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1425 mxge_close(sc, 0); 1426 err = mxge_open(sc); 1427 } 1428 return err; 1429 } 1430 1431 static int 1432 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1433 { 1434 mxge_softc_t *sc; 1435 unsigned int lro_cnt; 1436 int err; 1437 1438 sc = arg1; 1439 lro_cnt = sc->lro_cnt; 1440 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1441 if (err != 0) 1442 return err; 1443 1444 if (lro_cnt == sc->lro_cnt) 1445 return 0; 1446 1447 if (lro_cnt > 128) 1448 return EINVAL; 1449 1450 mtx_lock(&sc->driver_mtx); 1451 err = mxge_change_lro_locked(sc, lro_cnt); 1452 mtx_unlock(&sc->driver_mtx); 1453 return err; 1454 } 1455 1456 static int 1457 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1458 { 1459 int err; 1460 1461 if (arg1 == NULL) 1462 return EFAULT; 1463 arg2 = be32toh(*(int *)arg1); 1464 arg1 = NULL; 1465 err = sysctl_handle_int(oidp, arg1, arg2, req); 1466 1467 return err; 1468 } 1469 1470 static void 1471 mxge_rem_sysctls(mxge_softc_t *sc) 1472 { 1473 struct mxge_slice_state *ss; 1474 int slice; 1475 1476 if (sc->slice_sysctl_tree == NULL) 1477 return; 1478 1479 for (slice = 0; slice < sc->num_slices; slice++) { 1480 ss = &sc->ss[slice]; 1481 if (ss == NULL || ss->sysctl_tree == NULL) 1482 continue; 1483 sysctl_ctx_free(&ss->sysctl_ctx); 1484 ss->sysctl_tree = NULL; 1485 } 1486 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1487 sc->slice_sysctl_tree = NULL; 1488 } 1489 1490 static void 1491 mxge_add_sysctls(mxge_softc_t *sc) 1492 { 1493 struct sysctl_ctx_list *ctx; 1494 struct sysctl_oid_list *children; 1495 mcp_irq_data_t *fw; 1496 struct mxge_slice_state *ss; 1497 int slice; 1498 char slice_num[8]; 1499 1500 ctx = device_get_sysctl_ctx(sc->dev); 1501 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1502 fw = sc->ss[0].fw_stats; 1503 1504 /* random information */ 1505 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1506 "firmware_version", 1507 CTLFLAG_RD, &sc->fw_version, 1508 0, "firmware version"); 1509 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1510 "serial_number", 1511 CTLFLAG_RD, &sc->serial_number_string, 1512 0, "serial number"); 1513 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1514 "product_code", 1515 CTLFLAG_RD, &sc->product_code_string, 1516 0, "product_code"); 1517 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1518 "pcie_link_width", 1519 CTLFLAG_RD, &sc->link_width, 1520 0, "tx_boundary"); 1521 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1522 "tx_boundary", 1523 CTLFLAG_RD, &sc->tx_boundary, 1524 0, "tx_boundary"); 1525 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1526 "write_combine", 1527 CTLFLAG_RD, &sc->wc, 1528 0, "write combining PIO?"); 1529 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1530 "read_dma_MBs", 1531 CTLFLAG_RD, &sc->read_dma, 1532 0, "DMA Read speed in MB/s"); 1533 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1534 "write_dma_MBs", 1535 CTLFLAG_RD, &sc->write_dma, 1536 0, "DMA Write speed in MB/s"); 1537 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1538 "read_write_dma_MBs", 1539 CTLFLAG_RD, &sc->read_write_dma, 1540 0, "DMA concurrent Read/Write speed in MB/s"); 1541 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1542 "watchdog_resets", 1543 CTLFLAG_RD, &sc->watchdog_resets, 1544 0, "Number of times NIC was reset"); 1545 1546 1547 /* performance related tunables */ 1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1549 "intr_coal_delay", 1550 CTLTYPE_INT|CTLFLAG_RW, sc, 1551 0, mxge_change_intr_coal, 1552 "I", "interrupt coalescing delay in usecs"); 1553 1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1555 "throttle", 1556 CTLTYPE_INT|CTLFLAG_RW, sc, 1557 0, mxge_change_throttle, 1558 "I", "transmit throttling"); 1559 1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1561 "flow_control_enabled", 1562 CTLTYPE_INT|CTLFLAG_RW, sc, 1563 0, mxge_change_flow_control, 1564 "I", "interrupt coalescing delay in usecs"); 1565 1566 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1567 "deassert_wait", 1568 CTLFLAG_RW, &mxge_deassert_wait, 1569 0, "Wait for IRQ line to go low in ihandler"); 1570 1571 /* stats block from firmware is in network byte order. 1572 Need to swap it */ 1573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1574 "link_up", 1575 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1576 0, mxge_handle_be32, 1577 "I", "link up"); 1578 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1579 "rdma_tags_available", 1580 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1581 0, mxge_handle_be32, 1582 "I", "rdma_tags_available"); 1583 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1584 "dropped_bad_crc32", 1585 CTLTYPE_INT|CTLFLAG_RD, 1586 &fw->dropped_bad_crc32, 1587 0, mxge_handle_be32, 1588 "I", "dropped_bad_crc32"); 1589 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1590 "dropped_bad_phy", 1591 CTLTYPE_INT|CTLFLAG_RD, 1592 &fw->dropped_bad_phy, 1593 0, mxge_handle_be32, 1594 "I", "dropped_bad_phy"); 1595 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1596 "dropped_link_error_or_filtered", 1597 CTLTYPE_INT|CTLFLAG_RD, 1598 &fw->dropped_link_error_or_filtered, 1599 0, mxge_handle_be32, 1600 "I", "dropped_link_error_or_filtered"); 1601 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1602 "dropped_link_overflow", 1603 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1604 0, mxge_handle_be32, 1605 "I", "dropped_link_overflow"); 1606 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1607 "dropped_multicast_filtered", 1608 CTLTYPE_INT|CTLFLAG_RD, 1609 &fw->dropped_multicast_filtered, 1610 0, mxge_handle_be32, 1611 "I", "dropped_multicast_filtered"); 1612 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1613 "dropped_no_big_buffer", 1614 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1615 0, mxge_handle_be32, 1616 "I", "dropped_no_big_buffer"); 1617 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1618 "dropped_no_small_buffer", 1619 CTLTYPE_INT|CTLFLAG_RD, 1620 &fw->dropped_no_small_buffer, 1621 0, mxge_handle_be32, 1622 "I", "dropped_no_small_buffer"); 1623 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1624 "dropped_overrun", 1625 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1626 0, mxge_handle_be32, 1627 "I", "dropped_overrun"); 1628 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1629 "dropped_pause", 1630 CTLTYPE_INT|CTLFLAG_RD, 1631 &fw->dropped_pause, 1632 0, mxge_handle_be32, 1633 "I", "dropped_pause"); 1634 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1635 "dropped_runt", 1636 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1637 0, mxge_handle_be32, 1638 "I", "dropped_runt"); 1639 1640 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1641 "dropped_unicast_filtered", 1642 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1643 0, mxge_handle_be32, 1644 "I", "dropped_unicast_filtered"); 1645 1646 /* verbose printing? */ 1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1648 "verbose", 1649 CTLFLAG_RW, &mxge_verbose, 1650 0, "verbose printing"); 1651 1652 /* lro */ 1653 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1654 "lro_cnt", 1655 CTLTYPE_INT|CTLFLAG_RW, sc, 1656 0, mxge_change_lro, 1657 "I", "number of lro merge queues"); 1658 1659 1660 /* add counters exported for debugging from all slices */ 1661 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1662 sc->slice_sysctl_tree = 1663 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1664 "slice", CTLFLAG_RD, 0, ""); 1665 1666 for (slice = 0; slice < sc->num_slices; slice++) { 1667 ss = &sc->ss[slice]; 1668 sysctl_ctx_init(&ss->sysctl_ctx); 1669 ctx = &ss->sysctl_ctx; 1670 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1671 sprintf(slice_num, "%d", slice); 1672 ss->sysctl_tree = 1673 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1674 CTLFLAG_RD, 0, ""); 1675 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1677 "rx_small_cnt", 1678 CTLFLAG_RD, &ss->rx_small.cnt, 1679 0, "rx_small_cnt"); 1680 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1681 "rx_big_cnt", 1682 CTLFLAG_RD, &ss->rx_big.cnt, 1683 0, "rx_small_cnt"); 1684 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1685 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1686 0, "number of lro merge queues flushed"); 1687 1688 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1689 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1690 0, "number of frames appended to lro merge" 1691 "queues"); 1692 1693 #ifndef IFNET_BUF_RING 1694 /* only transmit from slice 0 for now */ 1695 if (slice > 0) 1696 continue; 1697 #endif 1698 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1699 "tx_req", 1700 CTLFLAG_RD, &ss->tx.req, 1701 0, "tx_req"); 1702 1703 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1704 "tx_done", 1705 CTLFLAG_RD, &ss->tx.done, 1706 0, "tx_done"); 1707 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1708 "tx_pkt_done", 1709 CTLFLAG_RD, &ss->tx.pkt_done, 1710 0, "tx_done"); 1711 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1712 "tx_stall", 1713 CTLFLAG_RD, &ss->tx.stall, 1714 0, "tx_stall"); 1715 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1716 "tx_wake", 1717 CTLFLAG_RD, &ss->tx.wake, 1718 0, "tx_wake"); 1719 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1720 "tx_defrag", 1721 CTLFLAG_RD, &ss->tx.defrag, 1722 0, "tx_defrag"); 1723 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1724 "tx_queue_active", 1725 CTLFLAG_RD, &ss->tx.queue_active, 1726 0, "tx_queue_active"); 1727 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1728 "tx_activate", 1729 CTLFLAG_RD, &ss->tx.activate, 1730 0, "tx_activate"); 1731 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1732 "tx_deactivate", 1733 CTLFLAG_RD, &ss->tx.deactivate, 1734 0, "tx_deactivate"); 1735 } 1736 } 1737 1738 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1739 backwards one at a time and handle ring wraps */ 1740 1741 static inline void 1742 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1743 mcp_kreq_ether_send_t *src, int cnt) 1744 { 1745 int idx, starting_slot; 1746 starting_slot = tx->req; 1747 while (cnt > 1) { 1748 cnt--; 1749 idx = (starting_slot + cnt) & tx->mask; 1750 mxge_pio_copy(&tx->lanai[idx], 1751 &src[cnt], sizeof(*src)); 1752 wmb(); 1753 } 1754 } 1755 1756 /* 1757 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1758 * at most 32 bytes at a time, so as to avoid involving the software 1759 * pio handler in the nic. We re-write the first segment's flags 1760 * to mark them valid only after writing the entire chain 1761 */ 1762 1763 static inline void 1764 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1765 int cnt) 1766 { 1767 int idx, i; 1768 uint32_t *src_ints; 1769 volatile uint32_t *dst_ints; 1770 mcp_kreq_ether_send_t *srcp; 1771 volatile mcp_kreq_ether_send_t *dstp, *dst; 1772 uint8_t last_flags; 1773 1774 idx = tx->req & tx->mask; 1775 1776 last_flags = src->flags; 1777 src->flags = 0; 1778 wmb(); 1779 dst = dstp = &tx->lanai[idx]; 1780 srcp = src; 1781 1782 if ((idx + cnt) < tx->mask) { 1783 for (i = 0; i < (cnt - 1); i += 2) { 1784 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1785 wmb(); /* force write every 32 bytes */ 1786 srcp += 2; 1787 dstp += 2; 1788 } 1789 } else { 1790 /* submit all but the first request, and ensure 1791 that it is submitted below */ 1792 mxge_submit_req_backwards(tx, src, cnt); 1793 i = 0; 1794 } 1795 if (i < cnt) { 1796 /* submit the first request */ 1797 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1798 wmb(); /* barrier before setting valid flag */ 1799 } 1800 1801 /* re-write the last 32-bits with the valid flags */ 1802 src->flags = last_flags; 1803 src_ints = (uint32_t *)src; 1804 src_ints+=3; 1805 dst_ints = (volatile uint32_t *)dst; 1806 dst_ints+=3; 1807 *dst_ints = *src_ints; 1808 tx->req += cnt; 1809 wmb(); 1810 } 1811 1812 #if IFCAP_TSO4 1813 1814 static void 1815 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1816 int busdma_seg_cnt, int ip_off) 1817 { 1818 mxge_tx_ring_t *tx; 1819 mcp_kreq_ether_send_t *req; 1820 bus_dma_segment_t *seg; 1821 struct ip *ip; 1822 struct tcphdr *tcp; 1823 uint32_t low, high_swapped; 1824 int len, seglen, cum_len, cum_len_next; 1825 int next_is_first, chop, cnt, rdma_count, small; 1826 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1827 uint8_t flags, flags_next; 1828 static int once; 1829 1830 mss = m->m_pkthdr.tso_segsz; 1831 1832 /* negative cum_len signifies to the 1833 * send loop that we are still in the 1834 * header portion of the TSO packet. 1835 */ 1836 1837 /* ensure we have the ethernet, IP and TCP 1838 header together in the first mbuf, copy 1839 it to a scratch buffer if not */ 1840 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1841 m_copydata(m, 0, ip_off + sizeof (*ip), 1842 ss->scratch); 1843 ip = (struct ip *)(ss->scratch + ip_off); 1844 } else { 1845 ip = (struct ip *)(mtod(m, char *) + ip_off); 1846 } 1847 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1848 + sizeof (*tcp))) { 1849 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1850 + sizeof (*tcp), ss->scratch); 1851 ip = (struct ip *)(mtod(m, char *) + ip_off); 1852 } 1853 1854 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1855 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1856 cksum_offset = ip_off + (ip->ip_hl << 2); 1857 1858 /* TSO implies checksum offload on this hardware */ 1859 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) { 1860 /* 1861 * If packet has full TCP csum, replace it with pseudo hdr 1862 * sum that the NIC expects, otherwise the NIC will emit 1863 * packets with bad TCP checksums. 1864 */ 1865 m->m_pkthdr.csum_flags = CSUM_TCP; 1866 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1867 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 1868 htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset))); 1869 } 1870 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1871 1872 1873 /* for TSO, pseudo_hdr_offset holds mss. 1874 * The firmware figures out where to put 1875 * the checksum by parsing the header. */ 1876 pseudo_hdr_offset = htobe16(mss); 1877 1878 tx = &ss->tx; 1879 req = tx->req_list; 1880 seg = tx->seg_list; 1881 cnt = 0; 1882 rdma_count = 0; 1883 /* "rdma_count" is the number of RDMAs belonging to the 1884 * current packet BEFORE the current send request. For 1885 * non-TSO packets, this is equal to "count". 1886 * For TSO packets, rdma_count needs to be reset 1887 * to 0 after a segment cut. 1888 * 1889 * The rdma_count field of the send request is 1890 * the number of RDMAs of the packet starting at 1891 * that request. For TSO send requests with one ore more cuts 1892 * in the middle, this is the number of RDMAs starting 1893 * after the last cut in the request. All previous 1894 * segments before the last cut implicitly have 1 RDMA. 1895 * 1896 * Since the number of RDMAs is not known beforehand, 1897 * it must be filled-in retroactively - after each 1898 * segmentation cut or at the end of the entire packet. 1899 */ 1900 1901 while (busdma_seg_cnt) { 1902 /* Break the busdma segment up into pieces*/ 1903 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1904 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1905 len = seg->ds_len; 1906 1907 while (len) { 1908 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1909 seglen = len; 1910 cum_len_next = cum_len + seglen; 1911 (req-rdma_count)->rdma_count = rdma_count + 1; 1912 if (__predict_true(cum_len >= 0)) { 1913 /* payload */ 1914 chop = (cum_len_next > mss); 1915 cum_len_next = cum_len_next % mss; 1916 next_is_first = (cum_len_next == 0); 1917 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1918 flags_next |= next_is_first * 1919 MXGEFW_FLAGS_FIRST; 1920 rdma_count |= -(chop | next_is_first); 1921 rdma_count += chop & !next_is_first; 1922 } else if (cum_len_next >= 0) { 1923 /* header ends */ 1924 rdma_count = -1; 1925 cum_len_next = 0; 1926 seglen = -cum_len; 1927 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1928 flags_next = MXGEFW_FLAGS_TSO_PLD | 1929 MXGEFW_FLAGS_FIRST | 1930 (small * MXGEFW_FLAGS_SMALL); 1931 } 1932 1933 req->addr_high = high_swapped; 1934 req->addr_low = htobe32(low); 1935 req->pseudo_hdr_offset = pseudo_hdr_offset; 1936 req->pad = 0; 1937 req->rdma_count = 1; 1938 req->length = htobe16(seglen); 1939 req->cksum_offset = cksum_offset; 1940 req->flags = flags | ((cum_len & 1) * 1941 MXGEFW_FLAGS_ALIGN_ODD); 1942 low += seglen; 1943 len -= seglen; 1944 cum_len = cum_len_next; 1945 flags = flags_next; 1946 req++; 1947 cnt++; 1948 rdma_count++; 1949 if (__predict_false(cksum_offset > seglen)) 1950 cksum_offset -= seglen; 1951 else 1952 cksum_offset = 0; 1953 if (__predict_false(cnt > tx->max_desc)) 1954 goto drop; 1955 } 1956 busdma_seg_cnt--; 1957 seg++; 1958 } 1959 (req-rdma_count)->rdma_count = rdma_count; 1960 1961 do { 1962 req--; 1963 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1964 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1965 1966 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1967 mxge_submit_req(tx, tx->req_list, cnt); 1968 #ifdef IFNET_BUF_RING 1969 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1970 /* tell the NIC to start polling this slice */ 1971 *tx->send_go = 1; 1972 tx->queue_active = 1; 1973 tx->activate++; 1974 wmb(); 1975 } 1976 #endif 1977 return; 1978 1979 drop: 1980 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1981 m_freem(m); 1982 ss->oerrors++; 1983 if (!once) { 1984 printf("tx->max_desc exceeded via TSO!\n"); 1985 printf("mss = %d, %ld, %d!\n", mss, 1986 (long)seg - (long)tx->seg_list, tx->max_desc); 1987 once = 1; 1988 } 1989 return; 1990 1991 } 1992 1993 #endif /* IFCAP_TSO4 */ 1994 1995 #ifdef MXGE_NEW_VLAN_API 1996 /* 1997 * We reproduce the software vlan tag insertion from 1998 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1999 * vlan tag insertion. We need to advertise this in order to have the 2000 * vlan interface respect our csum offload flags. 2001 */ 2002 static struct mbuf * 2003 mxge_vlan_tag_insert(struct mbuf *m) 2004 { 2005 struct ether_vlan_header *evl; 2006 2007 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 2008 if (__predict_false(m == NULL)) 2009 return NULL; 2010 if (m->m_len < sizeof(*evl)) { 2011 m = m_pullup(m, sizeof(*evl)); 2012 if (__predict_false(m == NULL)) 2013 return NULL; 2014 } 2015 /* 2016 * Transform the Ethernet header into an Ethernet header 2017 * with 802.1Q encapsulation. 2018 */ 2019 evl = mtod(m, struct ether_vlan_header *); 2020 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2021 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2022 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2023 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2024 m->m_flags &= ~M_VLANTAG; 2025 return m; 2026 } 2027 #endif /* MXGE_NEW_VLAN_API */ 2028 2029 static void 2030 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2031 { 2032 mxge_softc_t *sc; 2033 mcp_kreq_ether_send_t *req; 2034 bus_dma_segment_t *seg; 2035 struct mbuf *m_tmp; 2036 struct ifnet *ifp; 2037 mxge_tx_ring_t *tx; 2038 struct ip *ip; 2039 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2040 uint16_t pseudo_hdr_offset; 2041 uint8_t flags, cksum_offset; 2042 2043 2044 sc = ss->sc; 2045 ifp = sc->ifp; 2046 tx = &ss->tx; 2047 2048 ip_off = sizeof (struct ether_header); 2049 #ifdef MXGE_NEW_VLAN_API 2050 if (m->m_flags & M_VLANTAG) { 2051 m = mxge_vlan_tag_insert(m); 2052 if (__predict_false(m == NULL)) 2053 goto drop; 2054 ip_off += ETHER_VLAN_ENCAP_LEN; 2055 } 2056 #endif 2057 /* (try to) map the frame for DMA */ 2058 idx = tx->req & tx->mask; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2060 m, tx->seg_list, &cnt, 2061 BUS_DMA_NOWAIT); 2062 if (__predict_false(err == EFBIG)) { 2063 /* Too many segments in the chain. Try 2064 to defrag */ 2065 m_tmp = m_defrag(m, M_NOWAIT); 2066 if (m_tmp == NULL) { 2067 goto drop; 2068 } 2069 ss->tx.defrag++; 2070 m = m_tmp; 2071 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2072 tx->info[idx].map, 2073 m, tx->seg_list, &cnt, 2074 BUS_DMA_NOWAIT); 2075 } 2076 if (__predict_false(err != 0)) { 2077 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2078 " packet len = %d\n", err, m->m_pkthdr.len); 2079 goto drop; 2080 } 2081 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2082 BUS_DMASYNC_PREWRITE); 2083 tx->info[idx].m = m; 2084 2085 #if IFCAP_TSO4 2086 /* TSO is different enough, we handle it in another routine */ 2087 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2088 mxge_encap_tso(ss, m, cnt, ip_off); 2089 return; 2090 } 2091 #endif 2092 2093 req = tx->req_list; 2094 cksum_offset = 0; 2095 pseudo_hdr_offset = 0; 2096 flags = MXGEFW_FLAGS_NO_TSO; 2097 2098 /* checksum offloading? */ 2099 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2100 /* ensure ip header is in first mbuf, copy 2101 it to a scratch buffer if not */ 2102 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2103 m_copydata(m, 0, ip_off + sizeof (*ip), 2104 ss->scratch); 2105 ip = (struct ip *)(ss->scratch + ip_off); 2106 } else { 2107 ip = (struct ip *)(mtod(m, char *) + ip_off); 2108 } 2109 cksum_offset = ip_off + (ip->ip_hl << 2); 2110 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2111 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2112 req->cksum_offset = cksum_offset; 2113 flags |= MXGEFW_FLAGS_CKSUM; 2114 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2115 } else { 2116 odd_flag = 0; 2117 } 2118 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2119 flags |= MXGEFW_FLAGS_SMALL; 2120 2121 /* convert segments into a request list */ 2122 cum_len = 0; 2123 seg = tx->seg_list; 2124 req->flags = MXGEFW_FLAGS_FIRST; 2125 for (i = 0; i < cnt; i++) { 2126 req->addr_low = 2127 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2128 req->addr_high = 2129 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2130 req->length = htobe16(seg->ds_len); 2131 req->cksum_offset = cksum_offset; 2132 if (cksum_offset > seg->ds_len) 2133 cksum_offset -= seg->ds_len; 2134 else 2135 cksum_offset = 0; 2136 req->pseudo_hdr_offset = pseudo_hdr_offset; 2137 req->pad = 0; /* complete solid 16-byte block */ 2138 req->rdma_count = 1; 2139 req->flags |= flags | ((cum_len & 1) * odd_flag); 2140 cum_len += seg->ds_len; 2141 seg++; 2142 req++; 2143 req->flags = 0; 2144 } 2145 req--; 2146 /* pad runts to 60 bytes */ 2147 if (cum_len < 60) { 2148 req++; 2149 req->addr_low = 2150 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2151 req->addr_high = 2152 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2153 req->length = htobe16(60 - cum_len); 2154 req->cksum_offset = 0; 2155 req->pseudo_hdr_offset = pseudo_hdr_offset; 2156 req->pad = 0; /* complete solid 16-byte block */ 2157 req->rdma_count = 1; 2158 req->flags |= flags | ((cum_len & 1) * odd_flag); 2159 cnt++; 2160 } 2161 2162 tx->req_list[0].rdma_count = cnt; 2163 #if 0 2164 /* print what the firmware will see */ 2165 for (i = 0; i < cnt; i++) { 2166 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2167 "cso:%d, flags:0x%x, rdma:%d\n", 2168 i, (int)ntohl(tx->req_list[i].addr_high), 2169 (int)ntohl(tx->req_list[i].addr_low), 2170 (int)ntohs(tx->req_list[i].length), 2171 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2172 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2173 tx->req_list[i].rdma_count); 2174 } 2175 printf("--------------\n"); 2176 #endif 2177 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2178 mxge_submit_req(tx, tx->req_list, cnt); 2179 #ifdef IFNET_BUF_RING 2180 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2181 /* tell the NIC to start polling this slice */ 2182 *tx->send_go = 1; 2183 tx->queue_active = 1; 2184 tx->activate++; 2185 wmb(); 2186 } 2187 #endif 2188 return; 2189 2190 drop: 2191 m_freem(m); 2192 ss->oerrors++; 2193 return; 2194 } 2195 2196 #ifdef IFNET_BUF_RING 2197 static void 2198 mxge_qflush(struct ifnet *ifp) 2199 { 2200 mxge_softc_t *sc = ifp->if_softc; 2201 mxge_tx_ring_t *tx; 2202 struct mbuf *m; 2203 int slice; 2204 2205 for (slice = 0; slice < sc->num_slices; slice++) { 2206 tx = &sc->ss[slice].tx; 2207 mtx_lock(&tx->mtx); 2208 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2209 m_freem(m); 2210 mtx_unlock(&tx->mtx); 2211 } 2212 if_qflush(ifp); 2213 } 2214 2215 static inline void 2216 mxge_start_locked(struct mxge_slice_state *ss) 2217 { 2218 mxge_softc_t *sc; 2219 struct mbuf *m; 2220 struct ifnet *ifp; 2221 mxge_tx_ring_t *tx; 2222 2223 sc = ss->sc; 2224 ifp = sc->ifp; 2225 tx = &ss->tx; 2226 2227 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2228 m = drbr_dequeue(ifp, tx->br); 2229 if (m == NULL) { 2230 return; 2231 } 2232 /* let BPF see it */ 2233 BPF_MTAP(ifp, m); 2234 2235 /* give it to the nic */ 2236 mxge_encap(ss, m); 2237 } 2238 /* ran out of transmit slots */ 2239 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2240 && (!drbr_empty(ifp, tx->br))) { 2241 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2242 tx->stall++; 2243 } 2244 } 2245 2246 static int 2247 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2248 { 2249 mxge_softc_t *sc; 2250 struct ifnet *ifp; 2251 mxge_tx_ring_t *tx; 2252 int err; 2253 2254 sc = ss->sc; 2255 ifp = sc->ifp; 2256 tx = &ss->tx; 2257 2258 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2259 IFF_DRV_RUNNING) { 2260 err = drbr_enqueue(ifp, tx->br, m); 2261 return (err); 2262 } 2263 2264 if (!drbr_needs_enqueue(ifp, tx->br) && 2265 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2266 /* let BPF see it */ 2267 BPF_MTAP(ifp, m); 2268 /* give it to the nic */ 2269 mxge_encap(ss, m); 2270 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2271 return (err); 2272 } 2273 if (!drbr_empty(ifp, tx->br)) 2274 mxge_start_locked(ss); 2275 return (0); 2276 } 2277 2278 static int 2279 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2280 { 2281 mxge_softc_t *sc = ifp->if_softc; 2282 struct mxge_slice_state *ss; 2283 mxge_tx_ring_t *tx; 2284 int err = 0; 2285 int slice; 2286 2287 slice = m->m_pkthdr.flowid; 2288 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2289 2290 ss = &sc->ss[slice]; 2291 tx = &ss->tx; 2292 2293 if (mtx_trylock(&tx->mtx)) { 2294 err = mxge_transmit_locked(ss, m); 2295 mtx_unlock(&tx->mtx); 2296 } else { 2297 err = drbr_enqueue(ifp, tx->br, m); 2298 } 2299 2300 return (err); 2301 } 2302 2303 #else 2304 2305 static inline void 2306 mxge_start_locked(struct mxge_slice_state *ss) 2307 { 2308 mxge_softc_t *sc; 2309 struct mbuf *m; 2310 struct ifnet *ifp; 2311 mxge_tx_ring_t *tx; 2312 2313 sc = ss->sc; 2314 ifp = sc->ifp; 2315 tx = &ss->tx; 2316 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2317 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2318 if (m == NULL) { 2319 return; 2320 } 2321 /* let BPF see it */ 2322 BPF_MTAP(ifp, m); 2323 2324 /* give it to the nic */ 2325 mxge_encap(ss, m); 2326 } 2327 /* ran out of transmit slots */ 2328 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2329 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2330 tx->stall++; 2331 } 2332 } 2333 #endif 2334 static void 2335 mxge_start(struct ifnet *ifp) 2336 { 2337 mxge_softc_t *sc = ifp->if_softc; 2338 struct mxge_slice_state *ss; 2339 2340 /* only use the first slice for now */ 2341 ss = &sc->ss[0]; 2342 mtx_lock(&ss->tx.mtx); 2343 mxge_start_locked(ss); 2344 mtx_unlock(&ss->tx.mtx); 2345 } 2346 2347 /* 2348 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2349 * at most 32 bytes at a time, so as to avoid involving the software 2350 * pio handler in the nic. We re-write the first segment's low 2351 * DMA address to mark it valid only after we write the entire chunk 2352 * in a burst 2353 */ 2354 static inline void 2355 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2356 mcp_kreq_ether_recv_t *src) 2357 { 2358 uint32_t low; 2359 2360 low = src->addr_low; 2361 src->addr_low = 0xffffffff; 2362 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2363 wmb(); 2364 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2365 wmb(); 2366 src->addr_low = low; 2367 dst->addr_low = low; 2368 wmb(); 2369 } 2370 2371 static int 2372 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2373 { 2374 bus_dma_segment_t seg; 2375 struct mbuf *m; 2376 mxge_rx_ring_t *rx = &ss->rx_small; 2377 int cnt, err; 2378 2379 m = m_gethdr(M_DONTWAIT, MT_DATA); 2380 if (m == NULL) { 2381 rx->alloc_fail++; 2382 err = ENOBUFS; 2383 goto done; 2384 } 2385 m->m_len = MHLEN; 2386 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2387 &seg, &cnt, BUS_DMA_NOWAIT); 2388 if (err != 0) { 2389 m_free(m); 2390 goto done; 2391 } 2392 rx->info[idx].m = m; 2393 rx->shadow[idx].addr_low = 2394 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2395 rx->shadow[idx].addr_high = 2396 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2397 2398 done: 2399 if ((idx & 7) == 7) 2400 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2401 return err; 2402 } 2403 2404 static int 2405 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2406 { 2407 bus_dma_segment_t seg[3]; 2408 struct mbuf *m; 2409 mxge_rx_ring_t *rx = &ss->rx_big; 2410 int cnt, err, i; 2411 2412 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2413 if (m == NULL) { 2414 rx->alloc_fail++; 2415 err = ENOBUFS; 2416 goto done; 2417 } 2418 m->m_len = rx->mlen; 2419 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2420 seg, &cnt, BUS_DMA_NOWAIT); 2421 if (err != 0) { 2422 m_free(m); 2423 goto done; 2424 } 2425 rx->info[idx].m = m; 2426 rx->shadow[idx].addr_low = 2427 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2428 rx->shadow[idx].addr_high = 2429 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2430 2431 #if MXGE_VIRT_JUMBOS 2432 for (i = 1; i < cnt; i++) { 2433 rx->shadow[idx + i].addr_low = 2434 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2435 rx->shadow[idx + i].addr_high = 2436 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2437 } 2438 #endif 2439 2440 done: 2441 for (i = 0; i < rx->nbufs; i++) { 2442 if ((idx & 7) == 7) { 2443 mxge_submit_8rx(&rx->lanai[idx - 7], 2444 &rx->shadow[idx - 7]); 2445 } 2446 idx++; 2447 } 2448 return err; 2449 } 2450 2451 /* 2452 * Myri10GE hardware checksums are not valid if the sender 2453 * padded the frame with non-zero padding. This is because 2454 * the firmware just does a simple 16-bit 1s complement 2455 * checksum across the entire frame, excluding the first 14 2456 * bytes. It is best to simply to check the checksum and 2457 * tell the stack about it only if the checksum is good 2458 */ 2459 2460 static inline uint16_t 2461 mxge_rx_csum(struct mbuf *m, int csum) 2462 { 2463 struct ether_header *eh; 2464 struct ip *ip; 2465 uint16_t c; 2466 2467 eh = mtod(m, struct ether_header *); 2468 2469 /* only deal with IPv4 TCP & UDP for now */ 2470 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2471 return 1; 2472 ip = (struct ip *)(eh + 1); 2473 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2474 ip->ip_p != IPPROTO_UDP)) 2475 return 1; 2476 #ifdef INET 2477 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2478 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2479 - (ip->ip_hl << 2) + ip->ip_p)); 2480 #else 2481 c = 1; 2482 #endif 2483 c ^= 0xffff; 2484 return (c); 2485 } 2486 2487 static void 2488 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2489 { 2490 struct ether_vlan_header *evl; 2491 struct ether_header *eh; 2492 uint32_t partial; 2493 2494 evl = mtod(m, struct ether_vlan_header *); 2495 eh = mtod(m, struct ether_header *); 2496 2497 /* 2498 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2499 * after what the firmware thought was the end of the ethernet 2500 * header. 2501 */ 2502 2503 /* put checksum into host byte order */ 2504 *csum = ntohs(*csum); 2505 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2506 (*csum) += ~partial; 2507 (*csum) += ((*csum) < ~partial); 2508 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2509 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2510 2511 /* restore checksum to network byte order; 2512 later consumers expect this */ 2513 *csum = htons(*csum); 2514 2515 /* save the tag */ 2516 #ifdef MXGE_NEW_VLAN_API 2517 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2518 #else 2519 { 2520 struct m_tag *mtag; 2521 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2522 M_NOWAIT); 2523 if (mtag == NULL) 2524 return; 2525 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2526 m_tag_prepend(m, mtag); 2527 } 2528 2529 #endif 2530 m->m_flags |= M_VLANTAG; 2531 2532 /* 2533 * Remove the 802.1q header by copying the Ethernet 2534 * addresses over it and adjusting the beginning of 2535 * the data in the mbuf. The encapsulated Ethernet 2536 * type field is already in place. 2537 */ 2538 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2539 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2540 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2541 } 2542 2543 2544 static inline void 2545 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2546 { 2547 mxge_softc_t *sc; 2548 struct ifnet *ifp; 2549 struct mbuf *m; 2550 struct ether_header *eh; 2551 mxge_rx_ring_t *rx; 2552 bus_dmamap_t old_map; 2553 int idx; 2554 uint16_t tcpudp_csum; 2555 2556 sc = ss->sc; 2557 ifp = sc->ifp; 2558 rx = &ss->rx_big; 2559 idx = rx->cnt & rx->mask; 2560 rx->cnt += rx->nbufs; 2561 /* save a pointer to the received mbuf */ 2562 m = rx->info[idx].m; 2563 /* try to replace the received mbuf */ 2564 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2565 /* drop the frame -- the old mbuf is re-cycled */ 2566 ifp->if_ierrors++; 2567 return; 2568 } 2569 2570 /* unmap the received buffer */ 2571 old_map = rx->info[idx].map; 2572 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2573 bus_dmamap_unload(rx->dmat, old_map); 2574 2575 /* swap the bus_dmamap_t's */ 2576 rx->info[idx].map = rx->extra_map; 2577 rx->extra_map = old_map; 2578 2579 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2580 * aligned */ 2581 m->m_data += MXGEFW_PAD; 2582 2583 m->m_pkthdr.rcvif = ifp; 2584 m->m_len = m->m_pkthdr.len = len; 2585 ss->ipackets++; 2586 eh = mtod(m, struct ether_header *); 2587 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2588 mxge_vlan_tag_remove(m, &csum); 2589 } 2590 /* if the checksum is valid, mark it in the mbuf header */ 2591 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2592 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2593 return; 2594 /* otherwise, it was a UDP frame, or a TCP frame which 2595 we could not do LRO on. Tell the stack that the 2596 checksum is good */ 2597 m->m_pkthdr.csum_data = 0xffff; 2598 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2599 } 2600 /* flowid only valid if RSS hashing is enabled */ 2601 if (sc->num_slices > 1) { 2602 m->m_pkthdr.flowid = (ss - sc->ss); 2603 m->m_flags |= M_FLOWID; 2604 } 2605 /* pass the frame up the stack */ 2606 (*ifp->if_input)(ifp, m); 2607 } 2608 2609 static inline void 2610 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2611 { 2612 mxge_softc_t *sc; 2613 struct ifnet *ifp; 2614 struct ether_header *eh; 2615 struct mbuf *m; 2616 mxge_rx_ring_t *rx; 2617 bus_dmamap_t old_map; 2618 int idx; 2619 uint16_t tcpudp_csum; 2620 2621 sc = ss->sc; 2622 ifp = sc->ifp; 2623 rx = &ss->rx_small; 2624 idx = rx->cnt & rx->mask; 2625 rx->cnt++; 2626 /* save a pointer to the received mbuf */ 2627 m = rx->info[idx].m; 2628 /* try to replace the received mbuf */ 2629 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2630 /* drop the frame -- the old mbuf is re-cycled */ 2631 ifp->if_ierrors++; 2632 return; 2633 } 2634 2635 /* unmap the received buffer */ 2636 old_map = rx->info[idx].map; 2637 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2638 bus_dmamap_unload(rx->dmat, old_map); 2639 2640 /* swap the bus_dmamap_t's */ 2641 rx->info[idx].map = rx->extra_map; 2642 rx->extra_map = old_map; 2643 2644 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2645 * aligned */ 2646 m->m_data += MXGEFW_PAD; 2647 2648 m->m_pkthdr.rcvif = ifp; 2649 m->m_len = m->m_pkthdr.len = len; 2650 ss->ipackets++; 2651 eh = mtod(m, struct ether_header *); 2652 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2653 mxge_vlan_tag_remove(m, &csum); 2654 } 2655 /* if the checksum is valid, mark it in the mbuf header */ 2656 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2657 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2658 return; 2659 /* otherwise, it was a UDP frame, or a TCP frame which 2660 we could not do LRO on. Tell the stack that the 2661 checksum is good */ 2662 m->m_pkthdr.csum_data = 0xffff; 2663 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2664 } 2665 /* flowid only valid if RSS hashing is enabled */ 2666 if (sc->num_slices > 1) { 2667 m->m_pkthdr.flowid = (ss - sc->ss); 2668 m->m_flags |= M_FLOWID; 2669 } 2670 /* pass the frame up the stack */ 2671 (*ifp->if_input)(ifp, m); 2672 } 2673 2674 static inline void 2675 mxge_clean_rx_done(struct mxge_slice_state *ss) 2676 { 2677 mxge_rx_done_t *rx_done = &ss->rx_done; 2678 int limit = 0; 2679 uint16_t length; 2680 uint16_t checksum; 2681 2682 2683 while (rx_done->entry[rx_done->idx].length != 0) { 2684 length = ntohs(rx_done->entry[rx_done->idx].length); 2685 rx_done->entry[rx_done->idx].length = 0; 2686 checksum = rx_done->entry[rx_done->idx].checksum; 2687 if (length <= (MHLEN - MXGEFW_PAD)) 2688 mxge_rx_done_small(ss, length, checksum); 2689 else 2690 mxge_rx_done_big(ss, length, checksum); 2691 rx_done->cnt++; 2692 rx_done->idx = rx_done->cnt & rx_done->mask; 2693 2694 /* limit potential for livelock */ 2695 if (__predict_false(++limit > rx_done->mask / 2)) 2696 break; 2697 } 2698 #ifdef INET 2699 while (!SLIST_EMPTY(&ss->lro_active)) { 2700 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2701 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2702 mxge_lro_flush(ss, lro); 2703 } 2704 #endif 2705 } 2706 2707 2708 static inline void 2709 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2710 { 2711 struct ifnet *ifp; 2712 mxge_tx_ring_t *tx; 2713 struct mbuf *m; 2714 bus_dmamap_t map; 2715 int idx; 2716 int *flags; 2717 2718 tx = &ss->tx; 2719 ifp = ss->sc->ifp; 2720 while (tx->pkt_done != mcp_idx) { 2721 idx = tx->done & tx->mask; 2722 tx->done++; 2723 m = tx->info[idx].m; 2724 /* mbuf and DMA map only attached to the first 2725 segment per-mbuf */ 2726 if (m != NULL) { 2727 ss->obytes += m->m_pkthdr.len; 2728 if (m->m_flags & M_MCAST) 2729 ss->omcasts++; 2730 ss->opackets++; 2731 tx->info[idx].m = NULL; 2732 map = tx->info[idx].map; 2733 bus_dmamap_unload(tx->dmat, map); 2734 m_freem(m); 2735 } 2736 if (tx->info[idx].flag) { 2737 tx->info[idx].flag = 0; 2738 tx->pkt_done++; 2739 } 2740 } 2741 2742 /* If we have space, clear IFF_OACTIVE to tell the stack that 2743 its OK to send packets */ 2744 #ifdef IFNET_BUF_RING 2745 flags = &ss->if_drv_flags; 2746 #else 2747 flags = &ifp->if_drv_flags; 2748 #endif 2749 mtx_lock(&ss->tx.mtx); 2750 if ((*flags) & IFF_DRV_OACTIVE && 2751 tx->req - tx->done < (tx->mask + 1)/4) { 2752 *(flags) &= ~IFF_DRV_OACTIVE; 2753 ss->tx.wake++; 2754 mxge_start_locked(ss); 2755 } 2756 #ifdef IFNET_BUF_RING 2757 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2758 /* let the NIC stop polling this queue, since there 2759 * are no more transmits pending */ 2760 if (tx->req == tx->done) { 2761 *tx->send_stop = 1; 2762 tx->queue_active = 0; 2763 tx->deactivate++; 2764 wmb(); 2765 } 2766 } 2767 #endif 2768 mtx_unlock(&ss->tx.mtx); 2769 2770 } 2771 2772 static struct mxge_media_type mxge_xfp_media_types[] = 2773 { 2774 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2775 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2776 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2777 {0, (1 << 5), "10GBASE-ER"}, 2778 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2779 {0, (1 << 3), "10GBASE-SW"}, 2780 {0, (1 << 2), "10GBASE-LW"}, 2781 {0, (1 << 1), "10GBASE-EW"}, 2782 {0, (1 << 0), "Reserved"} 2783 }; 2784 static struct mxge_media_type mxge_sfp_media_types[] = 2785 { 2786 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2787 {0, (1 << 7), "Reserved"}, 2788 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2789 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2790 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2791 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2792 }; 2793 2794 static void 2795 mxge_media_set(mxge_softc_t *sc, int media_type) 2796 { 2797 2798 2799 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2800 0, NULL); 2801 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2802 sc->current_media = media_type; 2803 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2804 } 2805 2806 static void 2807 mxge_media_init(mxge_softc_t *sc) 2808 { 2809 char *ptr; 2810 int i; 2811 2812 ifmedia_removeall(&sc->media); 2813 mxge_media_set(sc, IFM_AUTO); 2814 2815 /* 2816 * parse the product code to deterimine the interface type 2817 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2818 * after the 3rd dash in the driver's cached copy of the 2819 * EEPROM's product code string. 2820 */ 2821 ptr = sc->product_code_string; 2822 if (ptr == NULL) { 2823 device_printf(sc->dev, "Missing product code\n"); 2824 return; 2825 } 2826 2827 for (i = 0; i < 3; i++, ptr++) { 2828 ptr = strchr(ptr, '-'); 2829 if (ptr == NULL) { 2830 device_printf(sc->dev, 2831 "only %d dashes in PC?!?\n", i); 2832 return; 2833 } 2834 } 2835 if (*ptr == 'C' || *(ptr +1) == 'C') { 2836 /* -C is CX4 */ 2837 sc->connector = MXGE_CX4; 2838 mxge_media_set(sc, IFM_10G_CX4); 2839 } else if (*ptr == 'Q') { 2840 /* -Q is Quad Ribbon Fiber */ 2841 sc->connector = MXGE_QRF; 2842 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2843 /* FreeBSD has no media type for Quad ribbon fiber */ 2844 } else if (*ptr == 'R') { 2845 /* -R is XFP */ 2846 sc->connector = MXGE_XFP; 2847 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2848 /* -S or -2S is SFP+ */ 2849 sc->connector = MXGE_SFP; 2850 } else { 2851 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2852 } 2853 } 2854 2855 /* 2856 * Determine the media type for a NIC. Some XFPs will identify 2857 * themselves only when their link is up, so this is initiated via a 2858 * link up interrupt. However, this can potentially take up to 2859 * several milliseconds, so it is run via the watchdog routine, rather 2860 * than in the interrupt handler itself. 2861 */ 2862 static void 2863 mxge_media_probe(mxge_softc_t *sc) 2864 { 2865 mxge_cmd_t cmd; 2866 char *cage_type; 2867 2868 struct mxge_media_type *mxge_media_types = NULL; 2869 int i, err, ms, mxge_media_type_entries; 2870 uint32_t byte; 2871 2872 sc->need_media_probe = 0; 2873 2874 if (sc->connector == MXGE_XFP) { 2875 /* -R is XFP */ 2876 mxge_media_types = mxge_xfp_media_types; 2877 mxge_media_type_entries = 2878 sizeof (mxge_xfp_media_types) / 2879 sizeof (mxge_xfp_media_types[0]); 2880 byte = MXGE_XFP_COMPLIANCE_BYTE; 2881 cage_type = "XFP"; 2882 } else if (sc->connector == MXGE_SFP) { 2883 /* -S or -2S is SFP+ */ 2884 mxge_media_types = mxge_sfp_media_types; 2885 mxge_media_type_entries = 2886 sizeof (mxge_sfp_media_types) / 2887 sizeof (mxge_sfp_media_types[0]); 2888 cage_type = "SFP+"; 2889 byte = 3; 2890 } else { 2891 /* nothing to do; media type cannot change */ 2892 return; 2893 } 2894 2895 /* 2896 * At this point we know the NIC has an XFP cage, so now we 2897 * try to determine what is in the cage by using the 2898 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2899 * register. We read just one byte, which may take over 2900 * a millisecond 2901 */ 2902 2903 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2904 cmd.data1 = byte; 2905 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2906 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2907 device_printf(sc->dev, "failed to read XFP\n"); 2908 } 2909 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2910 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2911 } 2912 if (err != MXGEFW_CMD_OK) { 2913 return; 2914 } 2915 2916 /* now we wait for the data to be cached */ 2917 cmd.data0 = byte; 2918 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2919 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2920 DELAY(1000); 2921 cmd.data0 = byte; 2922 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2923 } 2924 if (err != MXGEFW_CMD_OK) { 2925 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2926 cage_type, err, ms); 2927 return; 2928 } 2929 2930 if (cmd.data0 == mxge_media_types[0].bitmask) { 2931 if (mxge_verbose) 2932 device_printf(sc->dev, "%s:%s\n", cage_type, 2933 mxge_media_types[0].name); 2934 if (sc->current_media != mxge_media_types[0].flag) { 2935 mxge_media_init(sc); 2936 mxge_media_set(sc, mxge_media_types[0].flag); 2937 } 2938 return; 2939 } 2940 for (i = 1; i < mxge_media_type_entries; i++) { 2941 if (cmd.data0 & mxge_media_types[i].bitmask) { 2942 if (mxge_verbose) 2943 device_printf(sc->dev, "%s:%s\n", 2944 cage_type, 2945 mxge_media_types[i].name); 2946 2947 if (sc->current_media != mxge_media_types[i].flag) { 2948 mxge_media_init(sc); 2949 mxge_media_set(sc, mxge_media_types[i].flag); 2950 } 2951 return; 2952 } 2953 } 2954 if (mxge_verbose) 2955 device_printf(sc->dev, "%s media 0x%x unknown\n", 2956 cage_type, cmd.data0); 2957 2958 return; 2959 } 2960 2961 static void 2962 mxge_intr(void *arg) 2963 { 2964 struct mxge_slice_state *ss = arg; 2965 mxge_softc_t *sc = ss->sc; 2966 mcp_irq_data_t *stats = ss->fw_stats; 2967 mxge_tx_ring_t *tx = &ss->tx; 2968 mxge_rx_done_t *rx_done = &ss->rx_done; 2969 uint32_t send_done_count; 2970 uint8_t valid; 2971 2972 2973 #ifndef IFNET_BUF_RING 2974 /* an interrupt on a non-zero slice is implicitly valid 2975 since MSI-X irqs are not shared */ 2976 if (ss != sc->ss) { 2977 mxge_clean_rx_done(ss); 2978 *ss->irq_claim = be32toh(3); 2979 return; 2980 } 2981 #endif 2982 2983 /* make sure the DMA has finished */ 2984 if (!stats->valid) { 2985 return; 2986 } 2987 valid = stats->valid; 2988 2989 if (sc->legacy_irq) { 2990 /* lower legacy IRQ */ 2991 *sc->irq_deassert = 0; 2992 if (!mxge_deassert_wait) 2993 /* don't wait for conf. that irq is low */ 2994 stats->valid = 0; 2995 } else { 2996 stats->valid = 0; 2997 } 2998 2999 /* loop while waiting for legacy irq deassertion */ 3000 do { 3001 /* check for transmit completes and receives */ 3002 send_done_count = be32toh(stats->send_done_count); 3003 while ((send_done_count != tx->pkt_done) || 3004 (rx_done->entry[rx_done->idx].length != 0)) { 3005 if (send_done_count != tx->pkt_done) 3006 mxge_tx_done(ss, (int)send_done_count); 3007 mxge_clean_rx_done(ss); 3008 send_done_count = be32toh(stats->send_done_count); 3009 } 3010 if (sc->legacy_irq && mxge_deassert_wait) 3011 wmb(); 3012 } while (*((volatile uint8_t *) &stats->valid)); 3013 3014 /* fw link & error stats meaningful only on the first slice */ 3015 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3016 if (sc->link_state != stats->link_up) { 3017 sc->link_state = stats->link_up; 3018 if (sc->link_state) { 3019 if_link_state_change(sc->ifp, LINK_STATE_UP); 3020 if_initbaudrate(sc->ifp, IF_Gbps(10)); 3021 if (mxge_verbose) 3022 device_printf(sc->dev, "link up\n"); 3023 } else { 3024 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3025 sc->ifp->if_baudrate = 0; 3026 if (mxge_verbose) 3027 device_printf(sc->dev, "link down\n"); 3028 } 3029 sc->need_media_probe = 1; 3030 } 3031 if (sc->rdma_tags_available != 3032 be32toh(stats->rdma_tags_available)) { 3033 sc->rdma_tags_available = 3034 be32toh(stats->rdma_tags_available); 3035 device_printf(sc->dev, "RDMA timed out! %d tags " 3036 "left\n", sc->rdma_tags_available); 3037 } 3038 3039 if (stats->link_down) { 3040 sc->down_cnt += stats->link_down; 3041 sc->link_state = 0; 3042 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3043 } 3044 } 3045 3046 /* check to see if we have rx token to pass back */ 3047 if (valid & 0x1) 3048 *ss->irq_claim = be32toh(3); 3049 *(ss->irq_claim + 1) = be32toh(3); 3050 } 3051 3052 static void 3053 mxge_init(void *arg) 3054 { 3055 mxge_softc_t *sc = arg; 3056 struct ifnet *ifp = sc->ifp; 3057 3058 3059 mtx_lock(&sc->driver_mtx); 3060 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3061 (void) mxge_open(sc); 3062 mtx_unlock(&sc->driver_mtx); 3063 } 3064 3065 3066 3067 static void 3068 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3069 { 3070 struct lro_entry *lro_entry; 3071 int i; 3072 3073 while (!SLIST_EMPTY(&ss->lro_free)) { 3074 lro_entry = SLIST_FIRST(&ss->lro_free); 3075 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3076 free(lro_entry, M_DEVBUF); 3077 } 3078 3079 for (i = 0; i <= ss->rx_big.mask; i++) { 3080 if (ss->rx_big.info[i].m == NULL) 3081 continue; 3082 bus_dmamap_unload(ss->rx_big.dmat, 3083 ss->rx_big.info[i].map); 3084 m_freem(ss->rx_big.info[i].m); 3085 ss->rx_big.info[i].m = NULL; 3086 } 3087 3088 for (i = 0; i <= ss->rx_small.mask; i++) { 3089 if (ss->rx_small.info[i].m == NULL) 3090 continue; 3091 bus_dmamap_unload(ss->rx_small.dmat, 3092 ss->rx_small.info[i].map); 3093 m_freem(ss->rx_small.info[i].m); 3094 ss->rx_small.info[i].m = NULL; 3095 } 3096 3097 /* transmit ring used only on the first slice */ 3098 if (ss->tx.info == NULL) 3099 return; 3100 3101 for (i = 0; i <= ss->tx.mask; i++) { 3102 ss->tx.info[i].flag = 0; 3103 if (ss->tx.info[i].m == NULL) 3104 continue; 3105 bus_dmamap_unload(ss->tx.dmat, 3106 ss->tx.info[i].map); 3107 m_freem(ss->tx.info[i].m); 3108 ss->tx.info[i].m = NULL; 3109 } 3110 } 3111 3112 static void 3113 mxge_free_mbufs(mxge_softc_t *sc) 3114 { 3115 int slice; 3116 3117 for (slice = 0; slice < sc->num_slices; slice++) 3118 mxge_free_slice_mbufs(&sc->ss[slice]); 3119 } 3120 3121 static void 3122 mxge_free_slice_rings(struct mxge_slice_state *ss) 3123 { 3124 int i; 3125 3126 3127 if (ss->rx_done.entry != NULL) 3128 mxge_dma_free(&ss->rx_done.dma); 3129 ss->rx_done.entry = NULL; 3130 3131 if (ss->tx.req_bytes != NULL) 3132 free(ss->tx.req_bytes, M_DEVBUF); 3133 ss->tx.req_bytes = NULL; 3134 3135 if (ss->tx.seg_list != NULL) 3136 free(ss->tx.seg_list, M_DEVBUF); 3137 ss->tx.seg_list = NULL; 3138 3139 if (ss->rx_small.shadow != NULL) 3140 free(ss->rx_small.shadow, M_DEVBUF); 3141 ss->rx_small.shadow = NULL; 3142 3143 if (ss->rx_big.shadow != NULL) 3144 free(ss->rx_big.shadow, M_DEVBUF); 3145 ss->rx_big.shadow = NULL; 3146 3147 if (ss->tx.info != NULL) { 3148 if (ss->tx.dmat != NULL) { 3149 for (i = 0; i <= ss->tx.mask; i++) { 3150 bus_dmamap_destroy(ss->tx.dmat, 3151 ss->tx.info[i].map); 3152 } 3153 bus_dma_tag_destroy(ss->tx.dmat); 3154 } 3155 free(ss->tx.info, M_DEVBUF); 3156 } 3157 ss->tx.info = NULL; 3158 3159 if (ss->rx_small.info != NULL) { 3160 if (ss->rx_small.dmat != NULL) { 3161 for (i = 0; i <= ss->rx_small.mask; i++) { 3162 bus_dmamap_destroy(ss->rx_small.dmat, 3163 ss->rx_small.info[i].map); 3164 } 3165 bus_dmamap_destroy(ss->rx_small.dmat, 3166 ss->rx_small.extra_map); 3167 bus_dma_tag_destroy(ss->rx_small.dmat); 3168 } 3169 free(ss->rx_small.info, M_DEVBUF); 3170 } 3171 ss->rx_small.info = NULL; 3172 3173 if (ss->rx_big.info != NULL) { 3174 if (ss->rx_big.dmat != NULL) { 3175 for (i = 0; i <= ss->rx_big.mask; i++) { 3176 bus_dmamap_destroy(ss->rx_big.dmat, 3177 ss->rx_big.info[i].map); 3178 } 3179 bus_dmamap_destroy(ss->rx_big.dmat, 3180 ss->rx_big.extra_map); 3181 bus_dma_tag_destroy(ss->rx_big.dmat); 3182 } 3183 free(ss->rx_big.info, M_DEVBUF); 3184 } 3185 ss->rx_big.info = NULL; 3186 } 3187 3188 static void 3189 mxge_free_rings(mxge_softc_t *sc) 3190 { 3191 int slice; 3192 3193 for (slice = 0; slice < sc->num_slices; slice++) 3194 mxge_free_slice_rings(&sc->ss[slice]); 3195 } 3196 3197 static int 3198 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3199 int tx_ring_entries) 3200 { 3201 mxge_softc_t *sc = ss->sc; 3202 size_t bytes; 3203 int err, i; 3204 3205 err = ENOMEM; 3206 3207 /* allocate per-slice receive resources */ 3208 3209 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3210 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3211 3212 /* allocate the rx shadow rings */ 3213 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3214 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3215 if (ss->rx_small.shadow == NULL) 3216 return err; 3217 3218 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3219 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3220 if (ss->rx_big.shadow == NULL) 3221 return err; 3222 3223 /* allocate the rx host info rings */ 3224 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3225 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3226 if (ss->rx_small.info == NULL) 3227 return err; 3228 3229 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3230 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3231 if (ss->rx_big.info == NULL) 3232 return err; 3233 3234 /* allocate the rx busdma resources */ 3235 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3236 1, /* alignment */ 3237 4096, /* boundary */ 3238 BUS_SPACE_MAXADDR, /* low */ 3239 BUS_SPACE_MAXADDR, /* high */ 3240 NULL, NULL, /* filter */ 3241 MHLEN, /* maxsize */ 3242 1, /* num segs */ 3243 MHLEN, /* maxsegsize */ 3244 BUS_DMA_ALLOCNOW, /* flags */ 3245 NULL, NULL, /* lock */ 3246 &ss->rx_small.dmat); /* tag */ 3247 if (err != 0) { 3248 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3249 err); 3250 return err; 3251 } 3252 3253 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3254 1, /* alignment */ 3255 #if MXGE_VIRT_JUMBOS 3256 4096, /* boundary */ 3257 #else 3258 0, /* boundary */ 3259 #endif 3260 BUS_SPACE_MAXADDR, /* low */ 3261 BUS_SPACE_MAXADDR, /* high */ 3262 NULL, NULL, /* filter */ 3263 3*4096, /* maxsize */ 3264 #if MXGE_VIRT_JUMBOS 3265 3, /* num segs */ 3266 4096, /* maxsegsize*/ 3267 #else 3268 1, /* num segs */ 3269 MJUM9BYTES, /* maxsegsize*/ 3270 #endif 3271 BUS_DMA_ALLOCNOW, /* flags */ 3272 NULL, NULL, /* lock */ 3273 &ss->rx_big.dmat); /* tag */ 3274 if (err != 0) { 3275 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3276 err); 3277 return err; 3278 } 3279 for (i = 0; i <= ss->rx_small.mask; i++) { 3280 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3281 &ss->rx_small.info[i].map); 3282 if (err != 0) { 3283 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3284 err); 3285 return err; 3286 } 3287 } 3288 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3289 &ss->rx_small.extra_map); 3290 if (err != 0) { 3291 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3292 err); 3293 return err; 3294 } 3295 3296 for (i = 0; i <= ss->rx_big.mask; i++) { 3297 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3298 &ss->rx_big.info[i].map); 3299 if (err != 0) { 3300 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3301 err); 3302 return err; 3303 } 3304 } 3305 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3306 &ss->rx_big.extra_map); 3307 if (err != 0) { 3308 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3309 err); 3310 return err; 3311 } 3312 3313 /* now allocate TX resouces */ 3314 3315 #ifndef IFNET_BUF_RING 3316 /* only use a single TX ring for now */ 3317 if (ss != ss->sc->ss) 3318 return 0; 3319 #endif 3320 3321 ss->tx.mask = tx_ring_entries - 1; 3322 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3323 3324 3325 /* allocate the tx request copy block */ 3326 bytes = 8 + 3327 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3328 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3329 if (ss->tx.req_bytes == NULL) 3330 return err; 3331 /* ensure req_list entries are aligned to 8 bytes */ 3332 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3333 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3334 3335 /* allocate the tx busdma segment list */ 3336 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3337 ss->tx.seg_list = (bus_dma_segment_t *) 3338 malloc(bytes, M_DEVBUF, M_WAITOK); 3339 if (ss->tx.seg_list == NULL) 3340 return err; 3341 3342 /* allocate the tx host info ring */ 3343 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3344 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3345 if (ss->tx.info == NULL) 3346 return err; 3347 3348 /* allocate the tx busdma resources */ 3349 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3350 1, /* alignment */ 3351 sc->tx_boundary, /* boundary */ 3352 BUS_SPACE_MAXADDR, /* low */ 3353 BUS_SPACE_MAXADDR, /* high */ 3354 NULL, NULL, /* filter */ 3355 65536 + 256, /* maxsize */ 3356 ss->tx.max_desc - 2, /* num segs */ 3357 sc->tx_boundary, /* maxsegsz */ 3358 BUS_DMA_ALLOCNOW, /* flags */ 3359 NULL, NULL, /* lock */ 3360 &ss->tx.dmat); /* tag */ 3361 3362 if (err != 0) { 3363 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3364 err); 3365 return err; 3366 } 3367 3368 /* now use these tags to setup dmamaps for each slot 3369 in the ring */ 3370 for (i = 0; i <= ss->tx.mask; i++) { 3371 err = bus_dmamap_create(ss->tx.dmat, 0, 3372 &ss->tx.info[i].map); 3373 if (err != 0) { 3374 device_printf(sc->dev, "Err %d tx dmamap\n", 3375 err); 3376 return err; 3377 } 3378 } 3379 return 0; 3380 3381 } 3382 3383 static int 3384 mxge_alloc_rings(mxge_softc_t *sc) 3385 { 3386 mxge_cmd_t cmd; 3387 int tx_ring_size; 3388 int tx_ring_entries, rx_ring_entries; 3389 int err, slice; 3390 3391 /* get ring sizes */ 3392 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3393 tx_ring_size = cmd.data0; 3394 if (err != 0) { 3395 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3396 goto abort; 3397 } 3398 3399 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3400 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3401 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3402 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3403 IFQ_SET_READY(&sc->ifp->if_snd); 3404 3405 for (slice = 0; slice < sc->num_slices; slice++) { 3406 err = mxge_alloc_slice_rings(&sc->ss[slice], 3407 rx_ring_entries, 3408 tx_ring_entries); 3409 if (err != 0) 3410 goto abort; 3411 } 3412 return 0; 3413 3414 abort: 3415 mxge_free_rings(sc); 3416 return err; 3417 3418 } 3419 3420 3421 static void 3422 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3423 { 3424 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3425 3426 if (bufsize < MCLBYTES) { 3427 /* easy, everything fits in a single buffer */ 3428 *big_buf_size = MCLBYTES; 3429 *cl_size = MCLBYTES; 3430 *nbufs = 1; 3431 return; 3432 } 3433 3434 if (bufsize < MJUMPAGESIZE) { 3435 /* still easy, everything still fits in a single buffer */ 3436 *big_buf_size = MJUMPAGESIZE; 3437 *cl_size = MJUMPAGESIZE; 3438 *nbufs = 1; 3439 return; 3440 } 3441 #if MXGE_VIRT_JUMBOS 3442 /* now we need to use virtually contiguous buffers */ 3443 *cl_size = MJUM9BYTES; 3444 *big_buf_size = 4096; 3445 *nbufs = mtu / 4096 + 1; 3446 /* needs to be a power of two, so round up */ 3447 if (*nbufs == 3) 3448 *nbufs = 4; 3449 #else 3450 *cl_size = MJUM9BYTES; 3451 *big_buf_size = MJUM9BYTES; 3452 *nbufs = 1; 3453 #endif 3454 } 3455 3456 static int 3457 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3458 { 3459 mxge_softc_t *sc; 3460 mxge_cmd_t cmd; 3461 bus_dmamap_t map; 3462 struct lro_entry *lro_entry; 3463 int err, i, slice; 3464 3465 3466 sc = ss->sc; 3467 slice = ss - sc->ss; 3468 3469 SLIST_INIT(&ss->lro_free); 3470 SLIST_INIT(&ss->lro_active); 3471 3472 for (i = 0; i < sc->lro_cnt; i++) { 3473 lro_entry = (struct lro_entry *) 3474 malloc(sizeof (*lro_entry), M_DEVBUF, 3475 M_NOWAIT | M_ZERO); 3476 if (lro_entry == NULL) { 3477 sc->lro_cnt = i; 3478 break; 3479 } 3480 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3481 } 3482 /* get the lanai pointers to the send and receive rings */ 3483 3484 err = 0; 3485 #ifndef IFNET_BUF_RING 3486 /* We currently only send from the first slice */ 3487 if (slice == 0) { 3488 #endif 3489 cmd.data0 = slice; 3490 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3491 ss->tx.lanai = 3492 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3493 ss->tx.send_go = (volatile uint32_t *) 3494 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3495 ss->tx.send_stop = (volatile uint32_t *) 3496 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3497 #ifndef IFNET_BUF_RING 3498 } 3499 #endif 3500 cmd.data0 = slice; 3501 err |= mxge_send_cmd(sc, 3502 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3503 ss->rx_small.lanai = 3504 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3505 cmd.data0 = slice; 3506 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3507 ss->rx_big.lanai = 3508 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3509 3510 if (err != 0) { 3511 device_printf(sc->dev, 3512 "failed to get ring sizes or locations\n"); 3513 return EIO; 3514 } 3515 3516 /* stock receive rings */ 3517 for (i = 0; i <= ss->rx_small.mask; i++) { 3518 map = ss->rx_small.info[i].map; 3519 err = mxge_get_buf_small(ss, map, i); 3520 if (err) { 3521 device_printf(sc->dev, "alloced %d/%d smalls\n", 3522 i, ss->rx_small.mask + 1); 3523 return ENOMEM; 3524 } 3525 } 3526 for (i = 0; i <= ss->rx_big.mask; i++) { 3527 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3528 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3529 } 3530 ss->rx_big.nbufs = nbufs; 3531 ss->rx_big.cl_size = cl_size; 3532 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3533 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3534 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3535 map = ss->rx_big.info[i].map; 3536 err = mxge_get_buf_big(ss, map, i); 3537 if (err) { 3538 device_printf(sc->dev, "alloced %d/%d bigs\n", 3539 i, ss->rx_big.mask + 1); 3540 return ENOMEM; 3541 } 3542 } 3543 return 0; 3544 } 3545 3546 static int 3547 mxge_open(mxge_softc_t *sc) 3548 { 3549 mxge_cmd_t cmd; 3550 int err, big_bytes, nbufs, slice, cl_size, i; 3551 bus_addr_t bus; 3552 volatile uint8_t *itable; 3553 struct mxge_slice_state *ss; 3554 3555 /* Copy the MAC address in case it was overridden */ 3556 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3557 3558 err = mxge_reset(sc, 1); 3559 if (err != 0) { 3560 device_printf(sc->dev, "failed to reset\n"); 3561 return EIO; 3562 } 3563 3564 if (sc->num_slices > 1) { 3565 /* setup the indirection table */ 3566 cmd.data0 = sc->num_slices; 3567 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3568 &cmd); 3569 3570 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3571 &cmd); 3572 if (err != 0) { 3573 device_printf(sc->dev, 3574 "failed to setup rss tables\n"); 3575 return err; 3576 } 3577 3578 /* just enable an identity mapping */ 3579 itable = sc->sram + cmd.data0; 3580 for (i = 0; i < sc->num_slices; i++) 3581 itable[i] = (uint8_t)i; 3582 3583 cmd.data0 = 1; 3584 cmd.data1 = mxge_rss_hash_type; 3585 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3586 if (err != 0) { 3587 device_printf(sc->dev, "failed to enable slices\n"); 3588 return err; 3589 } 3590 } 3591 3592 3593 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3594 3595 cmd.data0 = nbufs; 3596 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3597 &cmd); 3598 /* error is only meaningful if we're trying to set 3599 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3600 if (err && nbufs > 1) { 3601 device_printf(sc->dev, 3602 "Failed to set alway-use-n to %d\n", 3603 nbufs); 3604 return EIO; 3605 } 3606 /* Give the firmware the mtu and the big and small buffer 3607 sizes. The firmware wants the big buf size to be a power 3608 of two. Luckily, FreeBSD's clusters are powers of two */ 3609 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3610 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3611 cmd.data0 = MHLEN - MXGEFW_PAD; 3612 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3613 &cmd); 3614 cmd.data0 = big_bytes; 3615 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3616 3617 if (err != 0) { 3618 device_printf(sc->dev, "failed to setup params\n"); 3619 goto abort; 3620 } 3621 3622 /* Now give him the pointer to the stats block */ 3623 for (slice = 0; 3624 #ifdef IFNET_BUF_RING 3625 slice < sc->num_slices; 3626 #else 3627 slice < 1; 3628 #endif 3629 slice++) { 3630 ss = &sc->ss[slice]; 3631 cmd.data0 = 3632 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3633 cmd.data1 = 3634 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3635 cmd.data2 = sizeof(struct mcp_irq_data); 3636 cmd.data2 |= (slice << 16); 3637 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3638 } 3639 3640 if (err != 0) { 3641 bus = sc->ss->fw_stats_dma.bus_addr; 3642 bus += offsetof(struct mcp_irq_data, send_done_count); 3643 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3644 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3645 err = mxge_send_cmd(sc, 3646 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3647 &cmd); 3648 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3649 sc->fw_multicast_support = 0; 3650 } else { 3651 sc->fw_multicast_support = 1; 3652 } 3653 3654 if (err != 0) { 3655 device_printf(sc->dev, "failed to setup params\n"); 3656 goto abort; 3657 } 3658 3659 for (slice = 0; slice < sc->num_slices; slice++) { 3660 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3661 if (err != 0) { 3662 device_printf(sc->dev, "couldn't open slice %d\n", 3663 slice); 3664 goto abort; 3665 } 3666 } 3667 3668 /* Finally, start the firmware running */ 3669 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3670 if (err) { 3671 device_printf(sc->dev, "Couldn't bring up link\n"); 3672 goto abort; 3673 } 3674 #ifdef IFNET_BUF_RING 3675 for (slice = 0; slice < sc->num_slices; slice++) { 3676 ss = &sc->ss[slice]; 3677 ss->if_drv_flags |= IFF_DRV_RUNNING; 3678 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3679 } 3680 #endif 3681 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3682 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3683 3684 return 0; 3685 3686 3687 abort: 3688 mxge_free_mbufs(sc); 3689 3690 return err; 3691 } 3692 3693 static int 3694 mxge_close(mxge_softc_t *sc, int down) 3695 { 3696 mxge_cmd_t cmd; 3697 int err, old_down_cnt; 3698 #ifdef IFNET_BUF_RING 3699 struct mxge_slice_state *ss; 3700 int slice; 3701 #endif 3702 3703 #ifdef IFNET_BUF_RING 3704 for (slice = 0; slice < sc->num_slices; slice++) { 3705 ss = &sc->ss[slice]; 3706 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3707 } 3708 #endif 3709 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3710 if (!down) { 3711 old_down_cnt = sc->down_cnt; 3712 wmb(); 3713 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3714 if (err) { 3715 device_printf(sc->dev, 3716 "Couldn't bring down link\n"); 3717 } 3718 if (old_down_cnt == sc->down_cnt) { 3719 /* wait for down irq */ 3720 DELAY(10 * sc->intr_coal_delay); 3721 } 3722 wmb(); 3723 if (old_down_cnt == sc->down_cnt) { 3724 device_printf(sc->dev, "never got down irq\n"); 3725 } 3726 } 3727 mxge_free_mbufs(sc); 3728 3729 return 0; 3730 } 3731 3732 static void 3733 mxge_setup_cfg_space(mxge_softc_t *sc) 3734 { 3735 device_t dev = sc->dev; 3736 int reg; 3737 uint16_t cmd, lnk, pectl; 3738 3739 /* find the PCIe link width and set max read request to 4KB*/ 3740 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3741 lnk = pci_read_config(dev, reg + 0x12, 2); 3742 sc->link_width = (lnk >> 4) & 0x3f; 3743 3744 if (sc->pectl == 0) { 3745 pectl = pci_read_config(dev, reg + 0x8, 2); 3746 pectl = (pectl & ~0x7000) | (5 << 12); 3747 pci_write_config(dev, reg + 0x8, pectl, 2); 3748 sc->pectl = pectl; 3749 } else { 3750 /* restore saved pectl after watchdog reset */ 3751 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3752 } 3753 } 3754 3755 /* Enable DMA and Memory space access */ 3756 pci_enable_busmaster(dev); 3757 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3758 cmd |= PCIM_CMD_MEMEN; 3759 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3760 } 3761 3762 static uint32_t 3763 mxge_read_reboot(mxge_softc_t *sc) 3764 { 3765 device_t dev = sc->dev; 3766 uint32_t vs; 3767 3768 /* find the vendor specific offset */ 3769 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3770 device_printf(sc->dev, 3771 "could not find vendor specific offset\n"); 3772 return (uint32_t)-1; 3773 } 3774 /* enable read32 mode */ 3775 pci_write_config(dev, vs + 0x10, 0x3, 1); 3776 /* tell NIC which register to read */ 3777 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3778 return (pci_read_config(dev, vs + 0x14, 4)); 3779 } 3780 3781 static void 3782 mxge_watchdog_reset(mxge_softc_t *sc) 3783 { 3784 struct pci_devinfo *dinfo; 3785 struct mxge_slice_state *ss; 3786 int err, running, s, num_tx_slices = 1; 3787 uint32_t reboot; 3788 uint16_t cmd; 3789 3790 err = ENXIO; 3791 3792 device_printf(sc->dev, "Watchdog reset!\n"); 3793 3794 /* 3795 * check to see if the NIC rebooted. If it did, then all of 3796 * PCI config space has been reset, and things like the 3797 * busmaster bit will be zero. If this is the case, then we 3798 * must restore PCI config space before the NIC can be used 3799 * again 3800 */ 3801 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3802 if (cmd == 0xffff) { 3803 /* 3804 * maybe the watchdog caught the NIC rebooting; wait 3805 * up to 100ms for it to finish. If it does not come 3806 * back, then give up 3807 */ 3808 DELAY(1000*100); 3809 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3810 if (cmd == 0xffff) { 3811 device_printf(sc->dev, "NIC disappeared!\n"); 3812 } 3813 } 3814 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3815 /* print the reboot status */ 3816 reboot = mxge_read_reboot(sc); 3817 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3818 reboot); 3819 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3820 if (running) { 3821 3822 /* 3823 * quiesce NIC so that TX routines will not try to 3824 * xmit after restoration of BAR 3825 */ 3826 3827 /* Mark the link as down */ 3828 if (sc->link_state) { 3829 sc->link_state = 0; 3830 if_link_state_change(sc->ifp, 3831 LINK_STATE_DOWN); 3832 } 3833 #ifdef IFNET_BUF_RING 3834 num_tx_slices = sc->num_slices; 3835 #endif 3836 /* grab all TX locks to ensure no tx */ 3837 for (s = 0; s < num_tx_slices; s++) { 3838 ss = &sc->ss[s]; 3839 mtx_lock(&ss->tx.mtx); 3840 } 3841 mxge_close(sc, 1); 3842 } 3843 /* restore PCI configuration space */ 3844 dinfo = device_get_ivars(sc->dev); 3845 pci_cfg_restore(sc->dev, dinfo); 3846 3847 /* and redo any changes we made to our config space */ 3848 mxge_setup_cfg_space(sc); 3849 3850 /* reload f/w */ 3851 err = mxge_load_firmware(sc, 0); 3852 if (err) { 3853 device_printf(sc->dev, 3854 "Unable to re-load f/w\n"); 3855 } 3856 if (running) { 3857 if (!err) 3858 err = mxge_open(sc); 3859 /* release all TX locks */ 3860 for (s = 0; s < num_tx_slices; s++) { 3861 ss = &sc->ss[s]; 3862 #ifdef IFNET_BUF_RING 3863 mxge_start_locked(ss); 3864 #endif 3865 mtx_unlock(&ss->tx.mtx); 3866 } 3867 } 3868 sc->watchdog_resets++; 3869 } else { 3870 device_printf(sc->dev, 3871 "NIC did not reboot, not resetting\n"); 3872 err = 0; 3873 } 3874 if (err) { 3875 device_printf(sc->dev, "watchdog reset failed\n"); 3876 } else { 3877 if (sc->dying == 2) 3878 sc->dying = 0; 3879 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3880 } 3881 } 3882 3883 static void 3884 mxge_watchdog_task(void *arg, int pending) 3885 { 3886 mxge_softc_t *sc = arg; 3887 3888 3889 mtx_lock(&sc->driver_mtx); 3890 mxge_watchdog_reset(sc); 3891 mtx_unlock(&sc->driver_mtx); 3892 } 3893 3894 static void 3895 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3896 { 3897 tx = &sc->ss[slice].tx; 3898 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3899 device_printf(sc->dev, 3900 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3901 tx->req, tx->done, tx->queue_active); 3902 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3903 tx->activate, tx->deactivate); 3904 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3905 tx->pkt_done, 3906 be32toh(sc->ss->fw_stats->send_done_count)); 3907 } 3908 3909 static int 3910 mxge_watchdog(mxge_softc_t *sc) 3911 { 3912 mxge_tx_ring_t *tx; 3913 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3914 int i, err = 0; 3915 3916 /* see if we have outstanding transmits, which 3917 have been pending for more than mxge_ticks */ 3918 for (i = 0; 3919 #ifdef IFNET_BUF_RING 3920 (i < sc->num_slices) && (err == 0); 3921 #else 3922 (i < 1) && (err == 0); 3923 #endif 3924 i++) { 3925 tx = &sc->ss[i].tx; 3926 if (tx->req != tx->done && 3927 tx->watchdog_req != tx->watchdog_done && 3928 tx->done == tx->watchdog_done) { 3929 /* check for pause blocking before resetting */ 3930 if (tx->watchdog_rx_pause == rx_pause) { 3931 mxge_warn_stuck(sc, tx, i); 3932 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3933 return (ENXIO); 3934 } 3935 else 3936 device_printf(sc->dev, "Flow control blocking " 3937 "xmits, check link partner\n"); 3938 } 3939 3940 tx->watchdog_req = tx->req; 3941 tx->watchdog_done = tx->done; 3942 tx->watchdog_rx_pause = rx_pause; 3943 } 3944 3945 if (sc->need_media_probe) 3946 mxge_media_probe(sc); 3947 return (err); 3948 } 3949 3950 static u_long 3951 mxge_update_stats(mxge_softc_t *sc) 3952 { 3953 struct mxge_slice_state *ss; 3954 u_long pkts = 0; 3955 u_long ipackets = 0; 3956 u_long opackets = 0; 3957 #ifdef IFNET_BUF_RING 3958 u_long obytes = 0; 3959 u_long omcasts = 0; 3960 u_long odrops = 0; 3961 #endif 3962 u_long oerrors = 0; 3963 int slice; 3964 3965 for (slice = 0; slice < sc->num_slices; slice++) { 3966 ss = &sc->ss[slice]; 3967 ipackets += ss->ipackets; 3968 opackets += ss->opackets; 3969 #ifdef IFNET_BUF_RING 3970 obytes += ss->obytes; 3971 omcasts += ss->omcasts; 3972 odrops += ss->tx.br->br_drops; 3973 #endif 3974 oerrors += ss->oerrors; 3975 } 3976 pkts = (ipackets - sc->ifp->if_ipackets); 3977 pkts += (opackets - sc->ifp->if_opackets); 3978 sc->ifp->if_ipackets = ipackets; 3979 sc->ifp->if_opackets = opackets; 3980 #ifdef IFNET_BUF_RING 3981 sc->ifp->if_obytes = obytes; 3982 sc->ifp->if_omcasts = omcasts; 3983 sc->ifp->if_snd.ifq_drops = odrops; 3984 #endif 3985 sc->ifp->if_oerrors = oerrors; 3986 return pkts; 3987 } 3988 3989 static void 3990 mxge_tick(void *arg) 3991 { 3992 mxge_softc_t *sc = arg; 3993 u_long pkts = 0; 3994 int err = 0; 3995 int running, ticks; 3996 uint16_t cmd; 3997 3998 ticks = mxge_ticks; 3999 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4000 if (running) { 4001 /* aggregate stats from different slices */ 4002 pkts = mxge_update_stats(sc); 4003 if (!sc->watchdog_countdown) { 4004 err = mxge_watchdog(sc); 4005 sc->watchdog_countdown = 4; 4006 } 4007 sc->watchdog_countdown--; 4008 } 4009 if (pkts == 0) { 4010 /* ensure NIC did not suffer h/w fault while idle */ 4011 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4012 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4013 sc->dying = 2; 4014 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4015 err = ENXIO; 4016 } 4017 /* look less often if NIC is idle */ 4018 ticks *= 4; 4019 } 4020 4021 if (err == 0) 4022 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4023 4024 } 4025 4026 static int 4027 mxge_media_change(struct ifnet *ifp) 4028 { 4029 return EINVAL; 4030 } 4031 4032 static int 4033 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4034 { 4035 struct ifnet *ifp = sc->ifp; 4036 int real_mtu, old_mtu; 4037 int err = 0; 4038 4039 4040 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4041 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4042 return EINVAL; 4043 mtx_lock(&sc->driver_mtx); 4044 old_mtu = ifp->if_mtu; 4045 ifp->if_mtu = mtu; 4046 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4047 mxge_close(sc, 0); 4048 err = mxge_open(sc); 4049 if (err != 0) { 4050 ifp->if_mtu = old_mtu; 4051 mxge_close(sc, 0); 4052 (void) mxge_open(sc); 4053 } 4054 } 4055 mtx_unlock(&sc->driver_mtx); 4056 return err; 4057 } 4058 4059 static void 4060 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4061 { 4062 mxge_softc_t *sc = ifp->if_softc; 4063 4064 4065 if (sc == NULL) 4066 return; 4067 ifmr->ifm_status = IFM_AVALID; 4068 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4069 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4070 ifmr->ifm_active |= sc->current_media; 4071 } 4072 4073 static int 4074 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4075 { 4076 mxge_softc_t *sc = ifp->if_softc; 4077 struct ifreq *ifr = (struct ifreq *)data; 4078 int err, mask; 4079 4080 err = 0; 4081 switch (command) { 4082 case SIOCSIFADDR: 4083 case SIOCGIFADDR: 4084 err = ether_ioctl(ifp, command, data); 4085 break; 4086 4087 case SIOCSIFMTU: 4088 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4089 break; 4090 4091 case SIOCSIFFLAGS: 4092 mtx_lock(&sc->driver_mtx); 4093 if (sc->dying) { 4094 mtx_unlock(&sc->driver_mtx); 4095 return EINVAL; 4096 } 4097 if (ifp->if_flags & IFF_UP) { 4098 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4099 err = mxge_open(sc); 4100 } else { 4101 /* take care of promis can allmulti 4102 flag chages */ 4103 mxge_change_promisc(sc, 4104 ifp->if_flags & IFF_PROMISC); 4105 mxge_set_multicast_list(sc); 4106 } 4107 } else { 4108 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4109 mxge_close(sc, 0); 4110 } 4111 } 4112 mtx_unlock(&sc->driver_mtx); 4113 break; 4114 4115 case SIOCADDMULTI: 4116 case SIOCDELMULTI: 4117 mtx_lock(&sc->driver_mtx); 4118 mxge_set_multicast_list(sc); 4119 mtx_unlock(&sc->driver_mtx); 4120 break; 4121 4122 case SIOCSIFCAP: 4123 mtx_lock(&sc->driver_mtx); 4124 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4125 if (mask & IFCAP_TXCSUM) { 4126 if (IFCAP_TXCSUM & ifp->if_capenable) { 4127 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4128 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4129 | CSUM_TSO); 4130 } else { 4131 ifp->if_capenable |= IFCAP_TXCSUM; 4132 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4133 } 4134 } else if (mask & IFCAP_RXCSUM) { 4135 if (IFCAP_RXCSUM & ifp->if_capenable) { 4136 ifp->if_capenable &= ~IFCAP_RXCSUM; 4137 sc->csum_flag = 0; 4138 } else { 4139 ifp->if_capenable |= IFCAP_RXCSUM; 4140 sc->csum_flag = 1; 4141 } 4142 } 4143 if (mask & IFCAP_TSO4) { 4144 if (IFCAP_TSO4 & ifp->if_capenable) { 4145 ifp->if_capenable &= ~IFCAP_TSO4; 4146 ifp->if_hwassist &= ~CSUM_TSO; 4147 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4148 ifp->if_capenable |= IFCAP_TSO4; 4149 ifp->if_hwassist |= CSUM_TSO; 4150 } else { 4151 printf("mxge requires tx checksum offload" 4152 " be enabled to use TSO\n"); 4153 err = EINVAL; 4154 } 4155 } 4156 if (mask & IFCAP_LRO) { 4157 if (IFCAP_LRO & ifp->if_capenable) 4158 err = mxge_change_lro_locked(sc, 0); 4159 else 4160 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4161 } 4162 if (mask & IFCAP_VLAN_HWTAGGING) 4163 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4164 if (mask & IFCAP_VLAN_HWTSO) 4165 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4166 4167 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4168 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4169 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4170 4171 mtx_unlock(&sc->driver_mtx); 4172 VLAN_CAPABILITIES(ifp); 4173 4174 break; 4175 4176 case SIOCGIFMEDIA: 4177 mtx_lock(&sc->driver_mtx); 4178 mxge_media_probe(sc); 4179 mtx_unlock(&sc->driver_mtx); 4180 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4181 &sc->media, command); 4182 break; 4183 4184 default: 4185 err = ENOTTY; 4186 } 4187 return err; 4188 } 4189 4190 static void 4191 mxge_fetch_tunables(mxge_softc_t *sc) 4192 { 4193 4194 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4195 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4196 &mxge_flow_control); 4197 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4198 &mxge_intr_coal_delay); 4199 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4200 &mxge_nvidia_ecrc_enable); 4201 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4202 &mxge_force_firmware); 4203 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4204 &mxge_deassert_wait); 4205 TUNABLE_INT_FETCH("hw.mxge.verbose", 4206 &mxge_verbose); 4207 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4208 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4209 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4210 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4211 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4212 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4213 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4214 if (sc->lro_cnt != 0) 4215 mxge_lro_cnt = sc->lro_cnt; 4216 4217 if (bootverbose) 4218 mxge_verbose = 1; 4219 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4220 mxge_intr_coal_delay = 30; 4221 if (mxge_ticks == 0) 4222 mxge_ticks = hz / 2; 4223 sc->pause = mxge_flow_control; 4224 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4225 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4226 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4227 } 4228 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4229 mxge_initial_mtu < ETHER_MIN_LEN) 4230 mxge_initial_mtu = ETHERMTU_JUMBO; 4231 4232 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4233 mxge_throttle = MXGE_MAX_THROTTLE; 4234 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4235 mxge_throttle = MXGE_MIN_THROTTLE; 4236 sc->throttle = mxge_throttle; 4237 } 4238 4239 4240 static void 4241 mxge_free_slices(mxge_softc_t *sc) 4242 { 4243 struct mxge_slice_state *ss; 4244 int i; 4245 4246 4247 if (sc->ss == NULL) 4248 return; 4249 4250 for (i = 0; i < sc->num_slices; i++) { 4251 ss = &sc->ss[i]; 4252 if (ss->fw_stats != NULL) { 4253 mxge_dma_free(&ss->fw_stats_dma); 4254 ss->fw_stats = NULL; 4255 #ifdef IFNET_BUF_RING 4256 if (ss->tx.br != NULL) { 4257 drbr_free(ss->tx.br, M_DEVBUF); 4258 ss->tx.br = NULL; 4259 } 4260 #endif 4261 mtx_destroy(&ss->tx.mtx); 4262 } 4263 if (ss->rx_done.entry != NULL) { 4264 mxge_dma_free(&ss->rx_done.dma); 4265 ss->rx_done.entry = NULL; 4266 } 4267 } 4268 free(sc->ss, M_DEVBUF); 4269 sc->ss = NULL; 4270 } 4271 4272 static int 4273 mxge_alloc_slices(mxge_softc_t *sc) 4274 { 4275 mxge_cmd_t cmd; 4276 struct mxge_slice_state *ss; 4277 size_t bytes; 4278 int err, i, max_intr_slots; 4279 4280 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4281 if (err != 0) { 4282 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4283 return err; 4284 } 4285 sc->rx_ring_size = cmd.data0; 4286 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4287 4288 bytes = sizeof (*sc->ss) * sc->num_slices; 4289 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4290 if (sc->ss == NULL) 4291 return (ENOMEM); 4292 for (i = 0; i < sc->num_slices; i++) { 4293 ss = &sc->ss[i]; 4294 4295 ss->sc = sc; 4296 4297 /* allocate per-slice rx interrupt queues */ 4298 4299 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4300 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4301 if (err != 0) 4302 goto abort; 4303 ss->rx_done.entry = ss->rx_done.dma.addr; 4304 bzero(ss->rx_done.entry, bytes); 4305 4306 /* 4307 * allocate the per-slice firmware stats; stats 4308 * (including tx) are used used only on the first 4309 * slice for now 4310 */ 4311 #ifndef IFNET_BUF_RING 4312 if (i > 0) 4313 continue; 4314 #endif 4315 4316 bytes = sizeof (*ss->fw_stats); 4317 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4318 sizeof (*ss->fw_stats), 64); 4319 if (err != 0) 4320 goto abort; 4321 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4322 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4323 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4324 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4325 #ifdef IFNET_BUF_RING 4326 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4327 &ss->tx.mtx); 4328 #endif 4329 } 4330 4331 return (0); 4332 4333 abort: 4334 mxge_free_slices(sc); 4335 return (ENOMEM); 4336 } 4337 4338 static void 4339 mxge_slice_probe(mxge_softc_t *sc) 4340 { 4341 mxge_cmd_t cmd; 4342 char *old_fw; 4343 int msix_cnt, status, max_intr_slots; 4344 4345 sc->num_slices = 1; 4346 /* 4347 * don't enable multiple slices if they are not enabled, 4348 * or if this is not an SMP system 4349 */ 4350 4351 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4352 return; 4353 4354 /* see how many MSI-X interrupts are available */ 4355 msix_cnt = pci_msix_count(sc->dev); 4356 if (msix_cnt < 2) 4357 return; 4358 4359 /* now load the slice aware firmware see what it supports */ 4360 old_fw = sc->fw_name; 4361 if (old_fw == mxge_fw_aligned) 4362 sc->fw_name = mxge_fw_rss_aligned; 4363 else 4364 sc->fw_name = mxge_fw_rss_unaligned; 4365 status = mxge_load_firmware(sc, 0); 4366 if (status != 0) { 4367 device_printf(sc->dev, "Falling back to a single slice\n"); 4368 return; 4369 } 4370 4371 /* try to send a reset command to the card to see if it 4372 is alive */ 4373 memset(&cmd, 0, sizeof (cmd)); 4374 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4375 if (status != 0) { 4376 device_printf(sc->dev, "failed reset\n"); 4377 goto abort_with_fw; 4378 } 4379 4380 /* get rx ring size */ 4381 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4382 if (status != 0) { 4383 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4384 goto abort_with_fw; 4385 } 4386 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4387 4388 /* tell it the size of the interrupt queues */ 4389 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4390 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4391 if (status != 0) { 4392 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4393 goto abort_with_fw; 4394 } 4395 4396 /* ask the maximum number of slices it supports */ 4397 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4398 if (status != 0) { 4399 device_printf(sc->dev, 4400 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4401 goto abort_with_fw; 4402 } 4403 sc->num_slices = cmd.data0; 4404 if (sc->num_slices > msix_cnt) 4405 sc->num_slices = msix_cnt; 4406 4407 if (mxge_max_slices == -1) { 4408 /* cap to number of CPUs in system */ 4409 if (sc->num_slices > mp_ncpus) 4410 sc->num_slices = mp_ncpus; 4411 } else { 4412 if (sc->num_slices > mxge_max_slices) 4413 sc->num_slices = mxge_max_slices; 4414 } 4415 /* make sure it is a power of two */ 4416 while (sc->num_slices & (sc->num_slices - 1)) 4417 sc->num_slices--; 4418 4419 if (mxge_verbose) 4420 device_printf(sc->dev, "using %d slices\n", 4421 sc->num_slices); 4422 4423 return; 4424 4425 abort_with_fw: 4426 sc->fw_name = old_fw; 4427 (void) mxge_load_firmware(sc, 0); 4428 } 4429 4430 static int 4431 mxge_add_msix_irqs(mxge_softc_t *sc) 4432 { 4433 size_t bytes; 4434 int count, err, i, rid; 4435 4436 rid = PCIR_BAR(2); 4437 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4438 &rid, RF_ACTIVE); 4439 4440 if (sc->msix_table_res == NULL) { 4441 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4442 return ENXIO; 4443 } 4444 4445 count = sc->num_slices; 4446 err = pci_alloc_msix(sc->dev, &count); 4447 if (err != 0) { 4448 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4449 "err = %d \n", sc->num_slices, err); 4450 goto abort_with_msix_table; 4451 } 4452 if (count < sc->num_slices) { 4453 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4454 count, sc->num_slices); 4455 device_printf(sc->dev, 4456 "Try setting hw.mxge.max_slices to %d\n", 4457 count); 4458 err = ENOSPC; 4459 goto abort_with_msix; 4460 } 4461 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4462 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4463 if (sc->msix_irq_res == NULL) { 4464 err = ENOMEM; 4465 goto abort_with_msix; 4466 } 4467 4468 for (i = 0; i < sc->num_slices; i++) { 4469 rid = i + 1; 4470 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4471 SYS_RES_IRQ, 4472 &rid, RF_ACTIVE); 4473 if (sc->msix_irq_res[i] == NULL) { 4474 device_printf(sc->dev, "couldn't allocate IRQ res" 4475 " for message %d\n", i); 4476 err = ENXIO; 4477 goto abort_with_res; 4478 } 4479 } 4480 4481 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4482 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4483 4484 for (i = 0; i < sc->num_slices; i++) { 4485 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4486 INTR_TYPE_NET | INTR_MPSAFE, 4487 #if __FreeBSD_version > 700030 4488 NULL, 4489 #endif 4490 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4491 if (err != 0) { 4492 device_printf(sc->dev, "couldn't setup intr for " 4493 "message %d\n", i); 4494 goto abort_with_intr; 4495 } 4496 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4497 sc->msix_ih[i], "s%d", i); 4498 } 4499 4500 if (mxge_verbose) { 4501 device_printf(sc->dev, "using %d msix IRQs:", 4502 sc->num_slices); 4503 for (i = 0; i < sc->num_slices; i++) 4504 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4505 printf("\n"); 4506 } 4507 return (0); 4508 4509 abort_with_intr: 4510 for (i = 0; i < sc->num_slices; i++) { 4511 if (sc->msix_ih[i] != NULL) { 4512 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4513 sc->msix_ih[i]); 4514 sc->msix_ih[i] = NULL; 4515 } 4516 } 4517 free(sc->msix_ih, M_DEVBUF); 4518 4519 4520 abort_with_res: 4521 for (i = 0; i < sc->num_slices; i++) { 4522 rid = i + 1; 4523 if (sc->msix_irq_res[i] != NULL) 4524 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4525 sc->msix_irq_res[i]); 4526 sc->msix_irq_res[i] = NULL; 4527 } 4528 free(sc->msix_irq_res, M_DEVBUF); 4529 4530 4531 abort_with_msix: 4532 pci_release_msi(sc->dev); 4533 4534 abort_with_msix_table: 4535 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4536 sc->msix_table_res); 4537 4538 return err; 4539 } 4540 4541 static int 4542 mxge_add_single_irq(mxge_softc_t *sc) 4543 { 4544 int count, err, rid; 4545 4546 count = pci_msi_count(sc->dev); 4547 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4548 rid = 1; 4549 } else { 4550 rid = 0; 4551 sc->legacy_irq = 1; 4552 } 4553 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4554 1, RF_SHAREABLE | RF_ACTIVE); 4555 if (sc->irq_res == NULL) { 4556 device_printf(sc->dev, "could not alloc interrupt\n"); 4557 return ENXIO; 4558 } 4559 if (mxge_verbose) 4560 device_printf(sc->dev, "using %s irq %ld\n", 4561 sc->legacy_irq ? "INTx" : "MSI", 4562 rman_get_start(sc->irq_res)); 4563 err = bus_setup_intr(sc->dev, sc->irq_res, 4564 INTR_TYPE_NET | INTR_MPSAFE, 4565 #if __FreeBSD_version > 700030 4566 NULL, 4567 #endif 4568 mxge_intr, &sc->ss[0], &sc->ih); 4569 if (err != 0) { 4570 bus_release_resource(sc->dev, SYS_RES_IRQ, 4571 sc->legacy_irq ? 0 : 1, sc->irq_res); 4572 if (!sc->legacy_irq) 4573 pci_release_msi(sc->dev); 4574 } 4575 return err; 4576 } 4577 4578 static void 4579 mxge_rem_msix_irqs(mxge_softc_t *sc) 4580 { 4581 int i, rid; 4582 4583 for (i = 0; i < sc->num_slices; i++) { 4584 if (sc->msix_ih[i] != NULL) { 4585 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4586 sc->msix_ih[i]); 4587 sc->msix_ih[i] = NULL; 4588 } 4589 } 4590 free(sc->msix_ih, M_DEVBUF); 4591 4592 for (i = 0; i < sc->num_slices; i++) { 4593 rid = i + 1; 4594 if (sc->msix_irq_res[i] != NULL) 4595 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4596 sc->msix_irq_res[i]); 4597 sc->msix_irq_res[i] = NULL; 4598 } 4599 free(sc->msix_irq_res, M_DEVBUF); 4600 4601 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4602 sc->msix_table_res); 4603 4604 pci_release_msi(sc->dev); 4605 return; 4606 } 4607 4608 static void 4609 mxge_rem_single_irq(mxge_softc_t *sc) 4610 { 4611 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4612 bus_release_resource(sc->dev, SYS_RES_IRQ, 4613 sc->legacy_irq ? 0 : 1, sc->irq_res); 4614 if (!sc->legacy_irq) 4615 pci_release_msi(sc->dev); 4616 } 4617 4618 static void 4619 mxge_rem_irq(mxge_softc_t *sc) 4620 { 4621 if (sc->num_slices > 1) 4622 mxge_rem_msix_irqs(sc); 4623 else 4624 mxge_rem_single_irq(sc); 4625 } 4626 4627 static int 4628 mxge_add_irq(mxge_softc_t *sc) 4629 { 4630 int err; 4631 4632 if (sc->num_slices > 1) 4633 err = mxge_add_msix_irqs(sc); 4634 else 4635 err = mxge_add_single_irq(sc); 4636 4637 if (0 && err == 0 && sc->num_slices > 1) { 4638 mxge_rem_msix_irqs(sc); 4639 err = mxge_add_msix_irqs(sc); 4640 } 4641 return err; 4642 } 4643 4644 4645 static int 4646 mxge_attach(device_t dev) 4647 { 4648 mxge_softc_t *sc = device_get_softc(dev); 4649 struct ifnet *ifp; 4650 int err, rid; 4651 4652 sc->dev = dev; 4653 mxge_fetch_tunables(sc); 4654 4655 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4656 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4657 taskqueue_thread_enqueue, &sc->tq); 4658 if (sc->tq == NULL) { 4659 err = ENOMEM; 4660 goto abort_with_nothing; 4661 } 4662 4663 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4664 1, /* alignment */ 4665 0, /* boundary */ 4666 BUS_SPACE_MAXADDR, /* low */ 4667 BUS_SPACE_MAXADDR, /* high */ 4668 NULL, NULL, /* filter */ 4669 65536 + 256, /* maxsize */ 4670 MXGE_MAX_SEND_DESC, /* num segs */ 4671 65536, /* maxsegsize */ 4672 0, /* flags */ 4673 NULL, NULL, /* lock */ 4674 &sc->parent_dmat); /* tag */ 4675 4676 if (err != 0) { 4677 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4678 err); 4679 goto abort_with_tq; 4680 } 4681 4682 ifp = sc->ifp = if_alloc(IFT_ETHER); 4683 if (ifp == NULL) { 4684 device_printf(dev, "can not if_alloc()\n"); 4685 err = ENOSPC; 4686 goto abort_with_parent_dmat; 4687 } 4688 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4689 4690 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4691 device_get_nameunit(dev)); 4692 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4693 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4694 "%s:drv", device_get_nameunit(dev)); 4695 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4696 MTX_NETWORK_LOCK, MTX_DEF); 4697 4698 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4699 4700 mxge_setup_cfg_space(sc); 4701 4702 /* Map the board into the kernel */ 4703 rid = PCIR_BARS; 4704 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4705 ~0, 1, RF_ACTIVE); 4706 if (sc->mem_res == NULL) { 4707 device_printf(dev, "could not map memory\n"); 4708 err = ENXIO; 4709 goto abort_with_lock; 4710 } 4711 sc->sram = rman_get_virtual(sc->mem_res); 4712 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4713 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4714 device_printf(dev, "impossible memory region size %ld\n", 4715 rman_get_size(sc->mem_res)); 4716 err = ENXIO; 4717 goto abort_with_mem_res; 4718 } 4719 4720 /* make NULL terminated copy of the EEPROM strings section of 4721 lanai SRAM */ 4722 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4723 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4724 rman_get_bushandle(sc->mem_res), 4725 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4726 sc->eeprom_strings, 4727 MXGE_EEPROM_STRINGS_SIZE - 2); 4728 err = mxge_parse_strings(sc); 4729 if (err != 0) 4730 goto abort_with_mem_res; 4731 4732 /* Enable write combining for efficient use of PCIe bus */ 4733 mxge_enable_wc(sc); 4734 4735 /* Allocate the out of band dma memory */ 4736 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4737 sizeof (mxge_cmd_t), 64); 4738 if (err != 0) 4739 goto abort_with_mem_res; 4740 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4741 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4742 if (err != 0) 4743 goto abort_with_cmd_dma; 4744 4745 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4746 if (err != 0) 4747 goto abort_with_zeropad_dma; 4748 4749 /* select & load the firmware */ 4750 err = mxge_select_firmware(sc); 4751 if (err != 0) 4752 goto abort_with_dmabench; 4753 sc->intr_coal_delay = mxge_intr_coal_delay; 4754 4755 mxge_slice_probe(sc); 4756 err = mxge_alloc_slices(sc); 4757 if (err != 0) 4758 goto abort_with_dmabench; 4759 4760 err = mxge_reset(sc, 0); 4761 if (err != 0) 4762 goto abort_with_slices; 4763 4764 err = mxge_alloc_rings(sc); 4765 if (err != 0) { 4766 device_printf(sc->dev, "failed to allocate rings\n"); 4767 goto abort_with_slices; 4768 } 4769 4770 err = mxge_add_irq(sc); 4771 if (err != 0) { 4772 device_printf(sc->dev, "failed to add irq\n"); 4773 goto abort_with_rings; 4774 } 4775 4776 if_initbaudrate(ifp, IF_Gbps(10)); 4777 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4778 IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 4779 #ifdef INET 4780 ifp->if_capabilities |= IFCAP_LRO; 4781 #endif 4782 4783 #ifdef MXGE_NEW_VLAN_API 4784 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4785 4786 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4787 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4788 sc->fw_ver_tiny >= 32) 4789 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4790 #endif 4791 4792 sc->max_mtu = mxge_max_mtu(sc); 4793 if (sc->max_mtu >= 9000) 4794 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4795 else 4796 device_printf(dev, "MTU limited to %d. Install " 4797 "latest firmware for 9000 byte jumbo support\n", 4798 sc->max_mtu - ETHER_HDR_LEN); 4799 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4800 ifp->if_capenable = ifp->if_capabilities; 4801 if (sc->lro_cnt == 0) 4802 ifp->if_capenable &= ~IFCAP_LRO; 4803 sc->csum_flag = 1; 4804 ifp->if_init = mxge_init; 4805 ifp->if_softc = sc; 4806 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4807 ifp->if_ioctl = mxge_ioctl; 4808 ifp->if_start = mxge_start; 4809 /* Initialise the ifmedia structure */ 4810 ifmedia_init(&sc->media, 0, mxge_media_change, 4811 mxge_media_status); 4812 mxge_media_init(sc); 4813 mxge_media_probe(sc); 4814 sc->dying = 0; 4815 ether_ifattach(ifp, sc->mac_addr); 4816 /* ether_ifattach sets mtu to ETHERMTU */ 4817 if (mxge_initial_mtu != ETHERMTU) 4818 mxge_change_mtu(sc, mxge_initial_mtu); 4819 4820 mxge_add_sysctls(sc); 4821 #ifdef IFNET_BUF_RING 4822 ifp->if_transmit = mxge_transmit; 4823 ifp->if_qflush = mxge_qflush; 4824 #endif 4825 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4826 device_get_nameunit(sc->dev)); 4827 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4828 return 0; 4829 4830 abort_with_rings: 4831 mxge_free_rings(sc); 4832 abort_with_slices: 4833 mxge_free_slices(sc); 4834 abort_with_dmabench: 4835 mxge_dma_free(&sc->dmabench_dma); 4836 abort_with_zeropad_dma: 4837 mxge_dma_free(&sc->zeropad_dma); 4838 abort_with_cmd_dma: 4839 mxge_dma_free(&sc->cmd_dma); 4840 abort_with_mem_res: 4841 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4842 abort_with_lock: 4843 pci_disable_busmaster(dev); 4844 mtx_destroy(&sc->cmd_mtx); 4845 mtx_destroy(&sc->driver_mtx); 4846 if_free(ifp); 4847 abort_with_parent_dmat: 4848 bus_dma_tag_destroy(sc->parent_dmat); 4849 abort_with_tq: 4850 if (sc->tq != NULL) { 4851 taskqueue_drain(sc->tq, &sc->watchdog_task); 4852 taskqueue_free(sc->tq); 4853 sc->tq = NULL; 4854 } 4855 abort_with_nothing: 4856 return err; 4857 } 4858 4859 static int 4860 mxge_detach(device_t dev) 4861 { 4862 mxge_softc_t *sc = device_get_softc(dev); 4863 4864 if (mxge_vlans_active(sc)) { 4865 device_printf(sc->dev, 4866 "Detach vlans before removing module\n"); 4867 return EBUSY; 4868 } 4869 mtx_lock(&sc->driver_mtx); 4870 sc->dying = 1; 4871 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4872 mxge_close(sc, 0); 4873 mtx_unlock(&sc->driver_mtx); 4874 ether_ifdetach(sc->ifp); 4875 if (sc->tq != NULL) { 4876 taskqueue_drain(sc->tq, &sc->watchdog_task); 4877 taskqueue_free(sc->tq); 4878 sc->tq = NULL; 4879 } 4880 callout_drain(&sc->co_hdl); 4881 ifmedia_removeall(&sc->media); 4882 mxge_dummy_rdma(sc, 0); 4883 mxge_rem_sysctls(sc); 4884 mxge_rem_irq(sc); 4885 mxge_free_rings(sc); 4886 mxge_free_slices(sc); 4887 mxge_dma_free(&sc->dmabench_dma); 4888 mxge_dma_free(&sc->zeropad_dma); 4889 mxge_dma_free(&sc->cmd_dma); 4890 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4891 pci_disable_busmaster(dev); 4892 mtx_destroy(&sc->cmd_mtx); 4893 mtx_destroy(&sc->driver_mtx); 4894 if_free(sc->ifp); 4895 bus_dma_tag_destroy(sc->parent_dmat); 4896 return 0; 4897 } 4898 4899 static int 4900 mxge_shutdown(device_t dev) 4901 { 4902 return 0; 4903 } 4904 4905 /* 4906 This file uses Myri10GE driver indentation. 4907 4908 Local Variables: 4909 c-file-style:"linux" 4910 tab-width:8 4911 End: 4912 */ 4913