1 /****************************************************************************** 2 3 Copyright (c) 2006-2009, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 #include <sys/taskqueue.h> 49 50 /* count xmits ourselves, rather than via drbr */ 51 #define NO_SLOW_STATS 52 #include <net/if.h> 53 #include <net/if_arp.h> 54 #include <net/ethernet.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 58 #include <net/bpf.h> 59 60 #include <net/if_types.h> 61 #include <net/if_vlan_var.h> 62 #include <net/zlib.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 69 #include <machine/bus.h> 70 #include <machine/in_cksum.h> 71 #include <machine/resource.h> 72 #include <sys/bus.h> 73 #include <sys/rman.h> 74 #include <sys/smp.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 /*#define MXGE_FAKE_IFP*/ 90 #include <dev/mxge/if_mxge_var.h> 91 #ifdef IFNET_BUF_RING 92 #include <sys/buf_ring.h> 93 #endif 94 95 #include "opt_inet.h" 96 97 /* tunable params */ 98 static int mxge_nvidia_ecrc_enable = 1; 99 static int mxge_force_firmware = 0; 100 static int mxge_intr_coal_delay = 30; 101 static int mxge_deassert_wait = 1; 102 static int mxge_flow_control = 1; 103 static int mxge_verbose = 0; 104 static int mxge_lro_cnt = 8; 105 static int mxge_ticks; 106 static int mxge_max_slices = 1; 107 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108 static int mxge_always_promisc = 0; 109 static int mxge_initial_mtu = ETHERMTU_JUMBO; 110 static int mxge_throttle = 0; 111 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112 static char *mxge_fw_aligned = "mxge_eth_z8e"; 113 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116 static int mxge_probe(device_t dev); 117 static int mxge_attach(device_t dev); 118 static int mxge_detach(device_t dev); 119 static int mxge_shutdown(device_t dev); 120 static void mxge_intr(void *arg); 121 122 static device_method_t mxge_methods[] = 123 { 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130 }; 131 132 static driver_t mxge_driver = 133 { 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137 }; 138 139 static devclass_t mxge_devclass; 140 141 /* Declare ourselves to be a child of the PCI bus.*/ 142 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148 static int mxge_close(mxge_softc_t *sc, int down); 149 static int mxge_open(mxge_softc_t *sc); 150 static void mxge_tick(void *arg); 151 152 static int 153 mxge_probe(device_t dev) 154 { 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178 } 179 180 static void 181 mxge_enable_wc(mxge_softc_t *sc) 182 { 183 #if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196 #endif 197 } 198 199 200 /* callback to get our DMA address */ 201 static void 202 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204 { 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208 } 209 210 static int 211 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213 { 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263 abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265 abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268 } 269 270 271 static void 272 mxge_dma_free(mxge_dma_t *dma) 273 { 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277 } 278 279 /* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286 static int 287 mxge_parse_strings(mxge_softc_t *sc) 288 { 289 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327 } 328 329 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330 static void 331 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332 { 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381 #if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390 #endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452 } 453 #else 454 static void 455 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456 { 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460 } 461 #endif 462 463 464 static int 465 mxge_dma_test(mxge_softc_t *sc, int test_type) 466 { 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517 abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523 } 524 525 /* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544 static int 545 mxge_firmware_probe(mxge_softc_t *sc) 546 { 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595 } 596 597 static int 598 mxge_select_firmware(mxge_softc_t *sc) 599 { 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631 abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640 } 641 642 union qualhack 643 { 644 const char *ro_char; 645 char *rw_char; 646 }; 647 648 static int 649 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650 { 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677 } 678 679 static void * 680 z_alloc(void *nil, u_int items, u_int size) 681 { 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686 } 687 688 static void 689 z_free(void *nil, void *ptr) 690 { 691 free(ptr, M_TEMP); 692 } 693 694 695 static int 696 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697 { 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770 abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772 abort_with_zs: 773 inflateEnd(&zs); 774 abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777 } 778 779 /* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784 static void 785 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786 { 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834 } 835 836 static int 837 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838 { 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 case MXGEFW_CMD_ERROR_I2C_ABSENT: 887 err = ENXIO; 888 break; 889 default: 890 device_printf(sc->dev, 891 "mxge: command %d " 892 "failed, result = %d\n", 893 cmd, be32toh(response->result)); 894 err = ENXIO; 895 break; 896 } 897 if (err != EAGAIN) 898 break; 899 } 900 if (err == EAGAIN) 901 device_printf(sc->dev, "mxge: command %d timed out" 902 "result = %d\n", 903 cmd, be32toh(response->result)); 904 mtx_unlock(&sc->cmd_mtx); 905 return err; 906 } 907 908 static int 909 mxge_adopt_running_firmware(mxge_softc_t *sc) 910 { 911 struct mcp_gen_header *hdr; 912 const size_t bytes = sizeof (struct mcp_gen_header); 913 size_t hdr_offset; 914 int status; 915 916 /* find running firmware header */ 917 hdr_offset = htobe32(*(volatile uint32_t *) 918 (sc->sram + MCP_HEADER_PTR_OFFSET)); 919 920 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 921 device_printf(sc->dev, 922 "Running firmware has bad header offset (%d)\n", 923 (int)hdr_offset); 924 return EIO; 925 } 926 927 /* copy header of running firmware from SRAM to host memory to 928 * validate firmware */ 929 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 930 if (hdr == NULL) { 931 device_printf(sc->dev, "could not malloc firmware hdr\n"); 932 return ENOMEM; 933 } 934 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 935 rman_get_bushandle(sc->mem_res), 936 hdr_offset, (char *)hdr, bytes); 937 status = mxge_validate_firmware(sc, hdr); 938 free(hdr, M_DEVBUF); 939 940 /* 941 * check to see if adopted firmware has bug where adopting 942 * it will cause broadcasts to be filtered unless the NIC 943 * is kept in ALLMULTI mode 944 */ 945 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 946 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 947 sc->adopted_rx_filter_bug = 1; 948 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 949 "working around rx filter bug\n", 950 sc->fw_ver_major, sc->fw_ver_minor, 951 sc->fw_ver_tiny); 952 } 953 954 return status; 955 } 956 957 958 static int 959 mxge_load_firmware(mxge_softc_t *sc, int adopt) 960 { 961 volatile uint32_t *confirm; 962 volatile char *submit; 963 char buf_bytes[72]; 964 uint32_t *buf, size, dma_low, dma_high; 965 int status, i; 966 967 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 968 969 size = sc->sram_size; 970 status = mxge_load_firmware_helper(sc, &size); 971 if (status) { 972 if (!adopt) 973 return status; 974 /* Try to use the currently running firmware, if 975 it is new enough */ 976 status = mxge_adopt_running_firmware(sc); 977 if (status) { 978 device_printf(sc->dev, 979 "failed to adopt running firmware\n"); 980 return status; 981 } 982 device_printf(sc->dev, 983 "Successfully adopted running firmware\n"); 984 if (sc->tx_boundary == 4096) { 985 device_printf(sc->dev, 986 "Using firmware currently running on NIC" 987 ". For optimal\n"); 988 device_printf(sc->dev, 989 "performance consider loading optimized " 990 "firmware\n"); 991 } 992 sc->fw_name = mxge_fw_unaligned; 993 sc->tx_boundary = 2048; 994 return 0; 995 } 996 /* clear confirmation addr */ 997 confirm = (volatile uint32_t *)sc->cmd; 998 *confirm = 0; 999 wmb(); 1000 /* send a reload command to the bootstrap MCP, and wait for the 1001 response in the confirmation address. The firmware should 1002 write a -1 there to indicate it is alive and well 1003 */ 1004 1005 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1006 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1007 1008 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1009 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1010 buf[2] = htobe32(0xffffffff); /* confirm data */ 1011 1012 /* FIX: All newest firmware should un-protect the bottom of 1013 the sram before handoff. However, the very first interfaces 1014 do not. Therefore the handoff copy must skip the first 8 bytes 1015 */ 1016 /* where the code starts*/ 1017 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1018 buf[4] = htobe32(size - 8); /* length of code */ 1019 buf[5] = htobe32(8); /* where to copy to */ 1020 buf[6] = htobe32(0); /* where to jump to */ 1021 1022 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1023 mxge_pio_copy(submit, buf, 64); 1024 wmb(); 1025 DELAY(1000); 1026 wmb(); 1027 i = 0; 1028 while (*confirm != 0xffffffff && i < 20) { 1029 DELAY(1000*10); 1030 i++; 1031 bus_dmamap_sync(sc->cmd_dma.dmat, 1032 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1033 } 1034 if (*confirm != 0xffffffff) { 1035 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1036 confirm, *confirm); 1037 1038 return ENXIO; 1039 } 1040 return 0; 1041 } 1042 1043 static int 1044 mxge_update_mac_address(mxge_softc_t *sc) 1045 { 1046 mxge_cmd_t cmd; 1047 uint8_t *addr = sc->mac_addr; 1048 int status; 1049 1050 1051 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1052 | (addr[2] << 8) | addr[3]); 1053 1054 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1055 1056 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1057 return status; 1058 } 1059 1060 static int 1061 mxge_change_pause(mxge_softc_t *sc, int pause) 1062 { 1063 mxge_cmd_t cmd; 1064 int status; 1065 1066 if (pause) 1067 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1068 &cmd); 1069 else 1070 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1071 &cmd); 1072 1073 if (status) { 1074 device_printf(sc->dev, "Failed to set flow control mode\n"); 1075 return ENXIO; 1076 } 1077 sc->pause = pause; 1078 return 0; 1079 } 1080 1081 static void 1082 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1083 { 1084 mxge_cmd_t cmd; 1085 int status; 1086 1087 if (mxge_always_promisc) 1088 promisc = 1; 1089 1090 if (promisc) 1091 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1092 &cmd); 1093 else 1094 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1095 &cmd); 1096 1097 if (status) { 1098 device_printf(sc->dev, "Failed to set promisc mode\n"); 1099 } 1100 } 1101 1102 static void 1103 mxge_set_multicast_list(mxge_softc_t *sc) 1104 { 1105 mxge_cmd_t cmd; 1106 struct ifmultiaddr *ifma; 1107 struct ifnet *ifp = sc->ifp; 1108 int err; 1109 1110 /* This firmware is known to not support multicast */ 1111 if (!sc->fw_multicast_support) 1112 return; 1113 1114 /* Disable multicast filtering while we play with the lists*/ 1115 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1116 if (err != 0) { 1117 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1118 " error status: %d\n", err); 1119 return; 1120 } 1121 1122 if (sc->adopted_rx_filter_bug) 1123 return; 1124 1125 if (ifp->if_flags & IFF_ALLMULTI) 1126 /* request to disable multicast filtering, so quit here */ 1127 return; 1128 1129 /* Flush all the filters */ 1130 1131 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1132 if (err != 0) { 1133 device_printf(sc->dev, 1134 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1135 ", error status: %d\n", err); 1136 return; 1137 } 1138 1139 /* Walk the multicast list, and add each address */ 1140 1141 if_maddr_rlock(ifp); 1142 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1143 if (ifma->ifma_addr->sa_family != AF_LINK) 1144 continue; 1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1146 &cmd.data0, 4); 1147 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1148 &cmd.data1, 2); 1149 cmd.data0 = htonl(cmd.data0); 1150 cmd.data1 = htonl(cmd.data1); 1151 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1152 if (err != 0) { 1153 device_printf(sc->dev, "Failed " 1154 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1155 "%d\t", err); 1156 /* abort, leaving multicast filtering off */ 1157 if_maddr_runlock(ifp); 1158 return; 1159 } 1160 } 1161 if_maddr_runlock(ifp); 1162 /* Enable multicast filtering */ 1163 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1164 if (err != 0) { 1165 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1166 ", error status: %d\n", err); 1167 } 1168 } 1169 1170 static int 1171 mxge_max_mtu(mxge_softc_t *sc) 1172 { 1173 mxge_cmd_t cmd; 1174 int status; 1175 1176 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1177 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1178 1179 /* try to set nbufs to see if it we can 1180 use virtually contiguous jumbos */ 1181 cmd.data0 = 0; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1183 &cmd); 1184 if (status == 0) 1185 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1186 1187 /* otherwise, we're limited to MJUMPAGESIZE */ 1188 return MJUMPAGESIZE - MXGEFW_PAD; 1189 } 1190 1191 static int 1192 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1193 { 1194 struct mxge_slice_state *ss; 1195 mxge_rx_done_t *rx_done; 1196 volatile uint32_t *irq_claim; 1197 mxge_cmd_t cmd; 1198 int slice, status; 1199 1200 /* try to send a reset command to the card to see if it 1201 is alive */ 1202 memset(&cmd, 0, sizeof (cmd)); 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1204 if (status != 0) { 1205 device_printf(sc->dev, "failed reset\n"); 1206 return ENXIO; 1207 } 1208 1209 mxge_dummy_rdma(sc, 1); 1210 1211 1212 /* set the intrq size */ 1213 cmd.data0 = sc->rx_ring_size; 1214 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1215 1216 /* 1217 * Even though we already know how many slices are supported 1218 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1219 * has magic side effects, and must be called after a reset. 1220 * It must be called prior to calling any RSS related cmds, 1221 * including assigning an interrupt queue for anything but 1222 * slice 0. It must also be called *after* 1223 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1224 * the firmware to compute offsets. 1225 */ 1226 1227 if (sc->num_slices > 1) { 1228 /* ask the maximum number of slices it supports */ 1229 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1230 &cmd); 1231 if (status != 0) { 1232 device_printf(sc->dev, 1233 "failed to get number of slices\n"); 1234 return status; 1235 } 1236 /* 1237 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1238 * to setting up the interrupt queue DMA 1239 */ 1240 cmd.data0 = sc->num_slices; 1241 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1242 #ifdef IFNET_BUF_RING 1243 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1244 #endif 1245 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1246 &cmd); 1247 if (status != 0) { 1248 device_printf(sc->dev, 1249 "failed to set number of slices\n"); 1250 return status; 1251 } 1252 } 1253 1254 1255 if (interrupts_setup) { 1256 /* Now exchange information about interrupts */ 1257 for (slice = 0; slice < sc->num_slices; slice++) { 1258 rx_done = &sc->ss[slice].rx_done; 1259 memset(rx_done->entry, 0, sc->rx_ring_size); 1260 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1261 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1262 cmd.data2 = slice; 1263 status |= mxge_send_cmd(sc, 1264 MXGEFW_CMD_SET_INTRQ_DMA, 1265 &cmd); 1266 } 1267 } 1268 1269 status |= mxge_send_cmd(sc, 1270 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1271 1272 1273 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1276 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1277 1278 1279 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1280 &cmd); 1281 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1282 if (status != 0) { 1283 device_printf(sc->dev, "failed set interrupt parameters\n"); 1284 return status; 1285 } 1286 1287 1288 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1289 1290 1291 /* run a DMA benchmark */ 1292 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1293 1294 for (slice = 0; slice < sc->num_slices; slice++) { 1295 ss = &sc->ss[slice]; 1296 1297 ss->irq_claim = irq_claim + (2 * slice); 1298 /* reset mcp/driver shared state back to 0 */ 1299 ss->rx_done.idx = 0; 1300 ss->rx_done.cnt = 0; 1301 ss->tx.req = 0; 1302 ss->tx.done = 0; 1303 ss->tx.pkt_done = 0; 1304 ss->tx.queue_active = 0; 1305 ss->tx.activate = 0; 1306 ss->tx.deactivate = 0; 1307 ss->tx.wake = 0; 1308 ss->tx.defrag = 0; 1309 ss->tx.stall = 0; 1310 ss->rx_big.cnt = 0; 1311 ss->rx_small.cnt = 0; 1312 ss->lro_bad_csum = 0; 1313 ss->lro_queued = 0; 1314 ss->lro_flushed = 0; 1315 if (ss->fw_stats != NULL) { 1316 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1317 } 1318 } 1319 sc->rdma_tags_available = 15; 1320 status = mxge_update_mac_address(sc); 1321 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1322 mxge_change_pause(sc, sc->pause); 1323 mxge_set_multicast_list(sc); 1324 if (sc->throttle) { 1325 cmd.data0 = sc->throttle; 1326 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1327 &cmd)) { 1328 device_printf(sc->dev, 1329 "can't enable throttle\n"); 1330 } 1331 } 1332 return status; 1333 } 1334 1335 static int 1336 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1337 { 1338 mxge_cmd_t cmd; 1339 mxge_softc_t *sc; 1340 int err; 1341 unsigned int throttle; 1342 1343 sc = arg1; 1344 throttle = sc->throttle; 1345 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1346 if (err != 0) { 1347 return err; 1348 } 1349 1350 if (throttle == sc->throttle) 1351 return 0; 1352 1353 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1354 return EINVAL; 1355 1356 mtx_lock(&sc->driver_mtx); 1357 cmd.data0 = throttle; 1358 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1359 if (err == 0) 1360 sc->throttle = throttle; 1361 mtx_unlock(&sc->driver_mtx); 1362 return err; 1363 } 1364 1365 static int 1366 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1367 { 1368 mxge_softc_t *sc; 1369 unsigned int intr_coal_delay; 1370 int err; 1371 1372 sc = arg1; 1373 intr_coal_delay = sc->intr_coal_delay; 1374 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1375 if (err != 0) { 1376 return err; 1377 } 1378 if (intr_coal_delay == sc->intr_coal_delay) 1379 return 0; 1380 1381 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1382 return EINVAL; 1383 1384 mtx_lock(&sc->driver_mtx); 1385 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1386 sc->intr_coal_delay = intr_coal_delay; 1387 1388 mtx_unlock(&sc->driver_mtx); 1389 return err; 1390 } 1391 1392 static int 1393 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1394 { 1395 mxge_softc_t *sc; 1396 unsigned int enabled; 1397 int err; 1398 1399 sc = arg1; 1400 enabled = sc->pause; 1401 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1402 if (err != 0) { 1403 return err; 1404 } 1405 if (enabled == sc->pause) 1406 return 0; 1407 1408 mtx_lock(&sc->driver_mtx); 1409 err = mxge_change_pause(sc, enabled); 1410 mtx_unlock(&sc->driver_mtx); 1411 return err; 1412 } 1413 1414 static int 1415 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1416 { 1417 struct ifnet *ifp; 1418 int err = 0; 1419 1420 ifp = sc->ifp; 1421 if (lro_cnt == 0) 1422 ifp->if_capenable &= ~IFCAP_LRO; 1423 else 1424 ifp->if_capenable |= IFCAP_LRO; 1425 sc->lro_cnt = lro_cnt; 1426 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1427 mxge_close(sc, 0); 1428 err = mxge_open(sc); 1429 } 1430 return err; 1431 } 1432 1433 static int 1434 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1435 { 1436 mxge_softc_t *sc; 1437 unsigned int lro_cnt; 1438 int err; 1439 1440 sc = arg1; 1441 lro_cnt = sc->lro_cnt; 1442 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1443 if (err != 0) 1444 return err; 1445 1446 if (lro_cnt == sc->lro_cnt) 1447 return 0; 1448 1449 if (lro_cnt > 128) 1450 return EINVAL; 1451 1452 mtx_lock(&sc->driver_mtx); 1453 err = mxge_change_lro_locked(sc, lro_cnt); 1454 mtx_unlock(&sc->driver_mtx); 1455 return err; 1456 } 1457 1458 static int 1459 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1460 { 1461 int err; 1462 1463 if (arg1 == NULL) 1464 return EFAULT; 1465 arg2 = be32toh(*(int *)arg1); 1466 arg1 = NULL; 1467 err = sysctl_handle_int(oidp, arg1, arg2, req); 1468 1469 return err; 1470 } 1471 1472 static void 1473 mxge_rem_sysctls(mxge_softc_t *sc) 1474 { 1475 struct mxge_slice_state *ss; 1476 int slice; 1477 1478 if (sc->slice_sysctl_tree == NULL) 1479 return; 1480 1481 for (slice = 0; slice < sc->num_slices; slice++) { 1482 ss = &sc->ss[slice]; 1483 if (ss == NULL || ss->sysctl_tree == NULL) 1484 continue; 1485 sysctl_ctx_free(&ss->sysctl_ctx); 1486 ss->sysctl_tree = NULL; 1487 } 1488 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1489 sc->slice_sysctl_tree = NULL; 1490 } 1491 1492 static void 1493 mxge_add_sysctls(mxge_softc_t *sc) 1494 { 1495 struct sysctl_ctx_list *ctx; 1496 struct sysctl_oid_list *children; 1497 mcp_irq_data_t *fw; 1498 struct mxge_slice_state *ss; 1499 int slice; 1500 char slice_num[8]; 1501 1502 ctx = device_get_sysctl_ctx(sc->dev); 1503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1504 fw = sc->ss[0].fw_stats; 1505 1506 /* random information */ 1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1508 "firmware_version", 1509 CTLFLAG_RD, &sc->fw_version, 1510 0, "firmware version"); 1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1512 "serial_number", 1513 CTLFLAG_RD, &sc->serial_number_string, 1514 0, "serial number"); 1515 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1516 "product_code", 1517 CTLFLAG_RD, &sc->product_code_string, 1518 0, "product_code"); 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1520 "pcie_link_width", 1521 CTLFLAG_RD, &sc->link_width, 1522 0, "tx_boundary"); 1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1524 "tx_boundary", 1525 CTLFLAG_RD, &sc->tx_boundary, 1526 0, "tx_boundary"); 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "write_combine", 1529 CTLFLAG_RD, &sc->wc, 1530 0, "write combining PIO?"); 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "read_dma_MBs", 1533 CTLFLAG_RD, &sc->read_dma, 1534 0, "DMA Read speed in MB/s"); 1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1536 "write_dma_MBs", 1537 CTLFLAG_RD, &sc->write_dma, 1538 0, "DMA Write speed in MB/s"); 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1540 "read_write_dma_MBs", 1541 CTLFLAG_RD, &sc->read_write_dma, 1542 0, "DMA concurrent Read/Write speed in MB/s"); 1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1544 "watchdog_resets", 1545 CTLFLAG_RD, &sc->watchdog_resets, 1546 0, "Number of times NIC was reset"); 1547 1548 1549 /* performance related tunables */ 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "intr_coal_delay", 1552 CTLTYPE_INT|CTLFLAG_RW, sc, 1553 0, mxge_change_intr_coal, 1554 "I", "interrupt coalescing delay in usecs"); 1555 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "throttle", 1558 CTLTYPE_INT|CTLFLAG_RW, sc, 1559 0, mxge_change_throttle, 1560 "I", "transmit throttling"); 1561 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "flow_control_enabled", 1564 CTLTYPE_INT|CTLFLAG_RW, sc, 1565 0, mxge_change_flow_control, 1566 "I", "interrupt coalescing delay in usecs"); 1567 1568 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1569 "deassert_wait", 1570 CTLFLAG_RW, &mxge_deassert_wait, 1571 0, "Wait for IRQ line to go low in ihandler"); 1572 1573 /* stats block from firmware is in network byte order. 1574 Need to swap it */ 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "link_up", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1578 0, mxge_handle_be32, 1579 "I", "link up"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "rdma_tags_available", 1582 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1583 0, mxge_handle_be32, 1584 "I", "rdma_tags_available"); 1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1586 "dropped_bad_crc32", 1587 CTLTYPE_INT|CTLFLAG_RD, 1588 &fw->dropped_bad_crc32, 1589 0, mxge_handle_be32, 1590 "I", "dropped_bad_crc32"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_bad_phy", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_bad_phy, 1595 0, mxge_handle_be32, 1596 "I", "dropped_bad_phy"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_link_error_or_filtered", 1599 CTLTYPE_INT|CTLFLAG_RD, 1600 &fw->dropped_link_error_or_filtered, 1601 0, mxge_handle_be32, 1602 "I", "dropped_link_error_or_filtered"); 1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1604 "dropped_link_overflow", 1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1606 0, mxge_handle_be32, 1607 "I", "dropped_link_overflow"); 1608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1609 "dropped_multicast_filtered", 1610 CTLTYPE_INT|CTLFLAG_RD, 1611 &fw->dropped_multicast_filtered, 1612 0, mxge_handle_be32, 1613 "I", "dropped_multicast_filtered"); 1614 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1615 "dropped_no_big_buffer", 1616 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1617 0, mxge_handle_be32, 1618 "I", "dropped_no_big_buffer"); 1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1620 "dropped_no_small_buffer", 1621 CTLTYPE_INT|CTLFLAG_RD, 1622 &fw->dropped_no_small_buffer, 1623 0, mxge_handle_be32, 1624 "I", "dropped_no_small_buffer"); 1625 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1626 "dropped_overrun", 1627 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1628 0, mxge_handle_be32, 1629 "I", "dropped_overrun"); 1630 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1631 "dropped_pause", 1632 CTLTYPE_INT|CTLFLAG_RD, 1633 &fw->dropped_pause, 1634 0, mxge_handle_be32, 1635 "I", "dropped_pause"); 1636 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1637 "dropped_runt", 1638 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1639 0, mxge_handle_be32, 1640 "I", "dropped_runt"); 1641 1642 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1643 "dropped_unicast_filtered", 1644 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1645 0, mxge_handle_be32, 1646 "I", "dropped_unicast_filtered"); 1647 1648 /* verbose printing? */ 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "verbose", 1651 CTLFLAG_RW, &mxge_verbose, 1652 0, "verbose printing"); 1653 1654 /* lro */ 1655 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1656 "lro_cnt", 1657 CTLTYPE_INT|CTLFLAG_RW, sc, 1658 0, mxge_change_lro, 1659 "I", "number of lro merge queues"); 1660 1661 1662 /* add counters exported for debugging from all slices */ 1663 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1664 sc->slice_sysctl_tree = 1665 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1666 "slice", CTLFLAG_RD, 0, ""); 1667 1668 for (slice = 0; slice < sc->num_slices; slice++) { 1669 ss = &sc->ss[slice]; 1670 sysctl_ctx_init(&ss->sysctl_ctx); 1671 ctx = &ss->sysctl_ctx; 1672 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1673 sprintf(slice_num, "%d", slice); 1674 ss->sysctl_tree = 1675 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1676 CTLFLAG_RD, 0, ""); 1677 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "rx_small_cnt", 1680 CTLFLAG_RD, &ss->rx_small.cnt, 1681 0, "rx_small_cnt"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "rx_big_cnt", 1684 CTLFLAG_RD, &ss->rx_big.cnt, 1685 0, "rx_small_cnt"); 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1688 0, "number of lro merge queues flushed"); 1689 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1692 0, "number of frames appended to lro merge" 1693 "queues"); 1694 1695 #ifndef IFNET_BUF_RING 1696 /* only transmit from slice 0 for now */ 1697 if (slice > 0) 1698 continue; 1699 #endif 1700 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1701 "tx_req", 1702 CTLFLAG_RD, &ss->tx.req, 1703 0, "tx_req"); 1704 1705 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1706 "tx_done", 1707 CTLFLAG_RD, &ss->tx.done, 1708 0, "tx_done"); 1709 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1710 "tx_pkt_done", 1711 CTLFLAG_RD, &ss->tx.pkt_done, 1712 0, "tx_done"); 1713 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1714 "tx_stall", 1715 CTLFLAG_RD, &ss->tx.stall, 1716 0, "tx_stall"); 1717 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1718 "tx_wake", 1719 CTLFLAG_RD, &ss->tx.wake, 1720 0, "tx_wake"); 1721 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1722 "tx_defrag", 1723 CTLFLAG_RD, &ss->tx.defrag, 1724 0, "tx_defrag"); 1725 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1726 "tx_queue_active", 1727 CTLFLAG_RD, &ss->tx.queue_active, 1728 0, "tx_queue_active"); 1729 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1730 "tx_activate", 1731 CTLFLAG_RD, &ss->tx.activate, 1732 0, "tx_activate"); 1733 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1734 "tx_deactivate", 1735 CTLFLAG_RD, &ss->tx.deactivate, 1736 0, "tx_deactivate"); 1737 } 1738 } 1739 1740 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1741 backwards one at a time and handle ring wraps */ 1742 1743 static inline void 1744 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1745 mcp_kreq_ether_send_t *src, int cnt) 1746 { 1747 int idx, starting_slot; 1748 starting_slot = tx->req; 1749 while (cnt > 1) { 1750 cnt--; 1751 idx = (starting_slot + cnt) & tx->mask; 1752 mxge_pio_copy(&tx->lanai[idx], 1753 &src[cnt], sizeof(*src)); 1754 wmb(); 1755 } 1756 } 1757 1758 /* 1759 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1760 * at most 32 bytes at a time, so as to avoid involving the software 1761 * pio handler in the nic. We re-write the first segment's flags 1762 * to mark them valid only after writing the entire chain 1763 */ 1764 1765 static inline void 1766 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1767 int cnt) 1768 { 1769 int idx, i; 1770 uint32_t *src_ints; 1771 volatile uint32_t *dst_ints; 1772 mcp_kreq_ether_send_t *srcp; 1773 volatile mcp_kreq_ether_send_t *dstp, *dst; 1774 uint8_t last_flags; 1775 1776 idx = tx->req & tx->mask; 1777 1778 last_flags = src->flags; 1779 src->flags = 0; 1780 wmb(); 1781 dst = dstp = &tx->lanai[idx]; 1782 srcp = src; 1783 1784 if ((idx + cnt) < tx->mask) { 1785 for (i = 0; i < (cnt - 1); i += 2) { 1786 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1787 wmb(); /* force write every 32 bytes */ 1788 srcp += 2; 1789 dstp += 2; 1790 } 1791 } else { 1792 /* submit all but the first request, and ensure 1793 that it is submitted below */ 1794 mxge_submit_req_backwards(tx, src, cnt); 1795 i = 0; 1796 } 1797 if (i < cnt) { 1798 /* submit the first request */ 1799 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1800 wmb(); /* barrier before setting valid flag */ 1801 } 1802 1803 /* re-write the last 32-bits with the valid flags */ 1804 src->flags = last_flags; 1805 src_ints = (uint32_t *)src; 1806 src_ints+=3; 1807 dst_ints = (volatile uint32_t *)dst; 1808 dst_ints+=3; 1809 *dst_ints = *src_ints; 1810 tx->req += cnt; 1811 wmb(); 1812 } 1813 1814 #if IFCAP_TSO4 1815 1816 static void 1817 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1818 int busdma_seg_cnt, int ip_off) 1819 { 1820 mxge_tx_ring_t *tx; 1821 mcp_kreq_ether_send_t *req; 1822 bus_dma_segment_t *seg; 1823 struct ip *ip; 1824 struct tcphdr *tcp; 1825 uint32_t low, high_swapped; 1826 int len, seglen, cum_len, cum_len_next; 1827 int next_is_first, chop, cnt, rdma_count, small; 1828 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1829 uint8_t flags, flags_next; 1830 static int once; 1831 1832 mss = m->m_pkthdr.tso_segsz; 1833 1834 /* negative cum_len signifies to the 1835 * send loop that we are still in the 1836 * header portion of the TSO packet. 1837 */ 1838 1839 /* ensure we have the ethernet, IP and TCP 1840 header together in the first mbuf, copy 1841 it to a scratch buffer if not */ 1842 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1843 m_copydata(m, 0, ip_off + sizeof (*ip), 1844 ss->scratch); 1845 ip = (struct ip *)(ss->scratch + ip_off); 1846 } else { 1847 ip = (struct ip *)(mtod(m, char *) + ip_off); 1848 } 1849 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1850 + sizeof (*tcp))) { 1851 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1852 + sizeof (*tcp), ss->scratch); 1853 ip = (struct ip *)(mtod(m, char *) + ip_off); 1854 } 1855 1856 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1857 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1858 cksum_offset = ip_off + (ip->ip_hl << 2); 1859 1860 /* TSO implies checksum offload on this hardware */ 1861 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) { 1862 /* 1863 * If packet has full TCP csum, replace it with pseudo hdr 1864 * sum that the NIC expects, otherwise the NIC will emit 1865 * packets with bad TCP checksums. 1866 */ 1867 m->m_pkthdr.csum_flags = CSUM_TCP; 1868 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1869 tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 1870 htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset))); 1871 } 1872 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1873 1874 1875 /* for TSO, pseudo_hdr_offset holds mss. 1876 * The firmware figures out where to put 1877 * the checksum by parsing the header. */ 1878 pseudo_hdr_offset = htobe16(mss); 1879 1880 tx = &ss->tx; 1881 req = tx->req_list; 1882 seg = tx->seg_list; 1883 cnt = 0; 1884 rdma_count = 0; 1885 /* "rdma_count" is the number of RDMAs belonging to the 1886 * current packet BEFORE the current send request. For 1887 * non-TSO packets, this is equal to "count". 1888 * For TSO packets, rdma_count needs to be reset 1889 * to 0 after a segment cut. 1890 * 1891 * The rdma_count field of the send request is 1892 * the number of RDMAs of the packet starting at 1893 * that request. For TSO send requests with one ore more cuts 1894 * in the middle, this is the number of RDMAs starting 1895 * after the last cut in the request. All previous 1896 * segments before the last cut implicitly have 1 RDMA. 1897 * 1898 * Since the number of RDMAs is not known beforehand, 1899 * it must be filled-in retroactively - after each 1900 * segmentation cut or at the end of the entire packet. 1901 */ 1902 1903 while (busdma_seg_cnt) { 1904 /* Break the busdma segment up into pieces*/ 1905 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1906 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1907 len = seg->ds_len; 1908 1909 while (len) { 1910 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1911 seglen = len; 1912 cum_len_next = cum_len + seglen; 1913 (req-rdma_count)->rdma_count = rdma_count + 1; 1914 if (__predict_true(cum_len >= 0)) { 1915 /* payload */ 1916 chop = (cum_len_next > mss); 1917 cum_len_next = cum_len_next % mss; 1918 next_is_first = (cum_len_next == 0); 1919 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1920 flags_next |= next_is_first * 1921 MXGEFW_FLAGS_FIRST; 1922 rdma_count |= -(chop | next_is_first); 1923 rdma_count += chop & !next_is_first; 1924 } else if (cum_len_next >= 0) { 1925 /* header ends */ 1926 rdma_count = -1; 1927 cum_len_next = 0; 1928 seglen = -cum_len; 1929 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1930 flags_next = MXGEFW_FLAGS_TSO_PLD | 1931 MXGEFW_FLAGS_FIRST | 1932 (small * MXGEFW_FLAGS_SMALL); 1933 } 1934 1935 req->addr_high = high_swapped; 1936 req->addr_low = htobe32(low); 1937 req->pseudo_hdr_offset = pseudo_hdr_offset; 1938 req->pad = 0; 1939 req->rdma_count = 1; 1940 req->length = htobe16(seglen); 1941 req->cksum_offset = cksum_offset; 1942 req->flags = flags | ((cum_len & 1) * 1943 MXGEFW_FLAGS_ALIGN_ODD); 1944 low += seglen; 1945 len -= seglen; 1946 cum_len = cum_len_next; 1947 flags = flags_next; 1948 req++; 1949 cnt++; 1950 rdma_count++; 1951 if (__predict_false(cksum_offset > seglen)) 1952 cksum_offset -= seglen; 1953 else 1954 cksum_offset = 0; 1955 if (__predict_false(cnt > tx->max_desc)) 1956 goto drop; 1957 } 1958 busdma_seg_cnt--; 1959 seg++; 1960 } 1961 (req-rdma_count)->rdma_count = rdma_count; 1962 1963 do { 1964 req--; 1965 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1966 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1967 1968 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1969 mxge_submit_req(tx, tx->req_list, cnt); 1970 #ifdef IFNET_BUF_RING 1971 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1972 /* tell the NIC to start polling this slice */ 1973 *tx->send_go = 1; 1974 tx->queue_active = 1; 1975 tx->activate++; 1976 wmb(); 1977 } 1978 #endif 1979 return; 1980 1981 drop: 1982 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1983 m_freem(m); 1984 ss->oerrors++; 1985 if (!once) { 1986 printf("tx->max_desc exceeded via TSO!\n"); 1987 printf("mss = %d, %ld, %d!\n", mss, 1988 (long)seg - (long)tx->seg_list, tx->max_desc); 1989 once = 1; 1990 } 1991 return; 1992 1993 } 1994 1995 #endif /* IFCAP_TSO4 */ 1996 1997 #ifdef MXGE_NEW_VLAN_API 1998 /* 1999 * We reproduce the software vlan tag insertion from 2000 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2001 * vlan tag insertion. We need to advertise this in order to have the 2002 * vlan interface respect our csum offload flags. 2003 */ 2004 static struct mbuf * 2005 mxge_vlan_tag_insert(struct mbuf *m) 2006 { 2007 struct ether_vlan_header *evl; 2008 2009 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 2010 if (__predict_false(m == NULL)) 2011 return NULL; 2012 if (m->m_len < sizeof(*evl)) { 2013 m = m_pullup(m, sizeof(*evl)); 2014 if (__predict_false(m == NULL)) 2015 return NULL; 2016 } 2017 /* 2018 * Transform the Ethernet header into an Ethernet header 2019 * with 802.1Q encapsulation. 2020 */ 2021 evl = mtod(m, struct ether_vlan_header *); 2022 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2023 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2024 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2025 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2026 m->m_flags &= ~M_VLANTAG; 2027 return m; 2028 } 2029 #endif /* MXGE_NEW_VLAN_API */ 2030 2031 static void 2032 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2033 { 2034 mxge_softc_t *sc; 2035 mcp_kreq_ether_send_t *req; 2036 bus_dma_segment_t *seg; 2037 struct mbuf *m_tmp; 2038 struct ifnet *ifp; 2039 mxge_tx_ring_t *tx; 2040 struct ip *ip; 2041 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2042 uint16_t pseudo_hdr_offset; 2043 uint8_t flags, cksum_offset; 2044 2045 2046 sc = ss->sc; 2047 ifp = sc->ifp; 2048 tx = &ss->tx; 2049 2050 ip_off = sizeof (struct ether_header); 2051 #ifdef MXGE_NEW_VLAN_API 2052 if (m->m_flags & M_VLANTAG) { 2053 m = mxge_vlan_tag_insert(m); 2054 if (__predict_false(m == NULL)) 2055 goto drop; 2056 ip_off += ETHER_VLAN_ENCAP_LEN; 2057 } 2058 #endif 2059 /* (try to) map the frame for DMA */ 2060 idx = tx->req & tx->mask; 2061 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2062 m, tx->seg_list, &cnt, 2063 BUS_DMA_NOWAIT); 2064 if (__predict_false(err == EFBIG)) { 2065 /* Too many segments in the chain. Try 2066 to defrag */ 2067 m_tmp = m_defrag(m, M_NOWAIT); 2068 if (m_tmp == NULL) { 2069 goto drop; 2070 } 2071 ss->tx.defrag++; 2072 m = m_tmp; 2073 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2074 tx->info[idx].map, 2075 m, tx->seg_list, &cnt, 2076 BUS_DMA_NOWAIT); 2077 } 2078 if (__predict_false(err != 0)) { 2079 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2080 " packet len = %d\n", err, m->m_pkthdr.len); 2081 goto drop; 2082 } 2083 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2084 BUS_DMASYNC_PREWRITE); 2085 tx->info[idx].m = m; 2086 2087 #if IFCAP_TSO4 2088 /* TSO is different enough, we handle it in another routine */ 2089 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2090 mxge_encap_tso(ss, m, cnt, ip_off); 2091 return; 2092 } 2093 #endif 2094 2095 req = tx->req_list; 2096 cksum_offset = 0; 2097 pseudo_hdr_offset = 0; 2098 flags = MXGEFW_FLAGS_NO_TSO; 2099 2100 /* checksum offloading? */ 2101 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2102 /* ensure ip header is in first mbuf, copy 2103 it to a scratch buffer if not */ 2104 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2105 m_copydata(m, 0, ip_off + sizeof (*ip), 2106 ss->scratch); 2107 ip = (struct ip *)(ss->scratch + ip_off); 2108 } else { 2109 ip = (struct ip *)(mtod(m, char *) + ip_off); 2110 } 2111 cksum_offset = ip_off + (ip->ip_hl << 2); 2112 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2113 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2114 req->cksum_offset = cksum_offset; 2115 flags |= MXGEFW_FLAGS_CKSUM; 2116 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2117 } else { 2118 odd_flag = 0; 2119 } 2120 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2121 flags |= MXGEFW_FLAGS_SMALL; 2122 2123 /* convert segments into a request list */ 2124 cum_len = 0; 2125 seg = tx->seg_list; 2126 req->flags = MXGEFW_FLAGS_FIRST; 2127 for (i = 0; i < cnt; i++) { 2128 req->addr_low = 2129 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2130 req->addr_high = 2131 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2132 req->length = htobe16(seg->ds_len); 2133 req->cksum_offset = cksum_offset; 2134 if (cksum_offset > seg->ds_len) 2135 cksum_offset -= seg->ds_len; 2136 else 2137 cksum_offset = 0; 2138 req->pseudo_hdr_offset = pseudo_hdr_offset; 2139 req->pad = 0; /* complete solid 16-byte block */ 2140 req->rdma_count = 1; 2141 req->flags |= flags | ((cum_len & 1) * odd_flag); 2142 cum_len += seg->ds_len; 2143 seg++; 2144 req++; 2145 req->flags = 0; 2146 } 2147 req--; 2148 /* pad runts to 60 bytes */ 2149 if (cum_len < 60) { 2150 req++; 2151 req->addr_low = 2152 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2153 req->addr_high = 2154 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2155 req->length = htobe16(60 - cum_len); 2156 req->cksum_offset = 0; 2157 req->pseudo_hdr_offset = pseudo_hdr_offset; 2158 req->pad = 0; /* complete solid 16-byte block */ 2159 req->rdma_count = 1; 2160 req->flags |= flags | ((cum_len & 1) * odd_flag); 2161 cnt++; 2162 } 2163 2164 tx->req_list[0].rdma_count = cnt; 2165 #if 0 2166 /* print what the firmware will see */ 2167 for (i = 0; i < cnt; i++) { 2168 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2169 "cso:%d, flags:0x%x, rdma:%d\n", 2170 i, (int)ntohl(tx->req_list[i].addr_high), 2171 (int)ntohl(tx->req_list[i].addr_low), 2172 (int)ntohs(tx->req_list[i].length), 2173 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2174 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2175 tx->req_list[i].rdma_count); 2176 } 2177 printf("--------------\n"); 2178 #endif 2179 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2180 mxge_submit_req(tx, tx->req_list, cnt); 2181 #ifdef IFNET_BUF_RING 2182 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2183 /* tell the NIC to start polling this slice */ 2184 *tx->send_go = 1; 2185 tx->queue_active = 1; 2186 tx->activate++; 2187 wmb(); 2188 } 2189 #endif 2190 return; 2191 2192 drop: 2193 m_freem(m); 2194 ss->oerrors++; 2195 return; 2196 } 2197 2198 #ifdef IFNET_BUF_RING 2199 static void 2200 mxge_qflush(struct ifnet *ifp) 2201 { 2202 mxge_softc_t *sc = ifp->if_softc; 2203 mxge_tx_ring_t *tx; 2204 struct mbuf *m; 2205 int slice; 2206 2207 for (slice = 0; slice < sc->num_slices; slice++) { 2208 tx = &sc->ss[slice].tx; 2209 mtx_lock(&tx->mtx); 2210 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2211 m_freem(m); 2212 mtx_unlock(&tx->mtx); 2213 } 2214 if_qflush(ifp); 2215 } 2216 2217 static inline void 2218 mxge_start_locked(struct mxge_slice_state *ss) 2219 { 2220 mxge_softc_t *sc; 2221 struct mbuf *m; 2222 struct ifnet *ifp; 2223 mxge_tx_ring_t *tx; 2224 2225 sc = ss->sc; 2226 ifp = sc->ifp; 2227 tx = &ss->tx; 2228 2229 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2230 m = drbr_dequeue(ifp, tx->br); 2231 if (m == NULL) { 2232 return; 2233 } 2234 /* let BPF see it */ 2235 BPF_MTAP(ifp, m); 2236 2237 /* give it to the nic */ 2238 mxge_encap(ss, m); 2239 } 2240 /* ran out of transmit slots */ 2241 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2242 && (!drbr_empty(ifp, tx->br))) { 2243 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2244 tx->stall++; 2245 } 2246 } 2247 2248 static int 2249 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2250 { 2251 mxge_softc_t *sc; 2252 struct ifnet *ifp; 2253 mxge_tx_ring_t *tx; 2254 int err; 2255 2256 sc = ss->sc; 2257 ifp = sc->ifp; 2258 tx = &ss->tx; 2259 2260 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2261 IFF_DRV_RUNNING) { 2262 err = drbr_enqueue(ifp, tx->br, m); 2263 return (err); 2264 } 2265 2266 if (!drbr_needs_enqueue(ifp, tx->br) && 2267 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2268 /* let BPF see it */ 2269 BPF_MTAP(ifp, m); 2270 /* give it to the nic */ 2271 mxge_encap(ss, m); 2272 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2273 return (err); 2274 } 2275 if (!drbr_empty(ifp, tx->br)) 2276 mxge_start_locked(ss); 2277 return (0); 2278 } 2279 2280 static int 2281 mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2282 { 2283 mxge_softc_t *sc = ifp->if_softc; 2284 struct mxge_slice_state *ss; 2285 mxge_tx_ring_t *tx; 2286 int err = 0; 2287 int slice; 2288 2289 slice = m->m_pkthdr.flowid; 2290 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2291 2292 ss = &sc->ss[slice]; 2293 tx = &ss->tx; 2294 2295 if (mtx_trylock(&tx->mtx)) { 2296 err = mxge_transmit_locked(ss, m); 2297 mtx_unlock(&tx->mtx); 2298 } else { 2299 err = drbr_enqueue(ifp, tx->br, m); 2300 } 2301 2302 return (err); 2303 } 2304 2305 #else 2306 2307 static inline void 2308 mxge_start_locked(struct mxge_slice_state *ss) 2309 { 2310 mxge_softc_t *sc; 2311 struct mbuf *m; 2312 struct ifnet *ifp; 2313 mxge_tx_ring_t *tx; 2314 2315 sc = ss->sc; 2316 ifp = sc->ifp; 2317 tx = &ss->tx; 2318 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2319 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2320 if (m == NULL) { 2321 return; 2322 } 2323 /* let BPF see it */ 2324 BPF_MTAP(ifp, m); 2325 2326 /* give it to the nic */ 2327 mxge_encap(ss, m); 2328 } 2329 /* ran out of transmit slots */ 2330 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2331 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2332 tx->stall++; 2333 } 2334 } 2335 #endif 2336 static void 2337 mxge_start(struct ifnet *ifp) 2338 { 2339 mxge_softc_t *sc = ifp->if_softc; 2340 struct mxge_slice_state *ss; 2341 2342 /* only use the first slice for now */ 2343 ss = &sc->ss[0]; 2344 mtx_lock(&ss->tx.mtx); 2345 mxge_start_locked(ss); 2346 mtx_unlock(&ss->tx.mtx); 2347 } 2348 2349 /* 2350 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2351 * at most 32 bytes at a time, so as to avoid involving the software 2352 * pio handler in the nic. We re-write the first segment's low 2353 * DMA address to mark it valid only after we write the entire chunk 2354 * in a burst 2355 */ 2356 static inline void 2357 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2358 mcp_kreq_ether_recv_t *src) 2359 { 2360 uint32_t low; 2361 2362 low = src->addr_low; 2363 src->addr_low = 0xffffffff; 2364 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2365 wmb(); 2366 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2367 wmb(); 2368 src->addr_low = low; 2369 dst->addr_low = low; 2370 wmb(); 2371 } 2372 2373 static int 2374 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2375 { 2376 bus_dma_segment_t seg; 2377 struct mbuf *m; 2378 mxge_rx_ring_t *rx = &ss->rx_small; 2379 int cnt, err; 2380 2381 m = m_gethdr(M_DONTWAIT, MT_DATA); 2382 if (m == NULL) { 2383 rx->alloc_fail++; 2384 err = ENOBUFS; 2385 goto done; 2386 } 2387 m->m_len = MHLEN; 2388 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2389 &seg, &cnt, BUS_DMA_NOWAIT); 2390 if (err != 0) { 2391 m_free(m); 2392 goto done; 2393 } 2394 rx->info[idx].m = m; 2395 rx->shadow[idx].addr_low = 2396 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2397 rx->shadow[idx].addr_high = 2398 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2399 2400 done: 2401 if ((idx & 7) == 7) 2402 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2403 return err; 2404 } 2405 2406 static int 2407 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2408 { 2409 bus_dma_segment_t seg[3]; 2410 struct mbuf *m; 2411 mxge_rx_ring_t *rx = &ss->rx_big; 2412 int cnt, err, i; 2413 2414 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2415 if (m == NULL) { 2416 rx->alloc_fail++; 2417 err = ENOBUFS; 2418 goto done; 2419 } 2420 m->m_len = rx->mlen; 2421 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2422 seg, &cnt, BUS_DMA_NOWAIT); 2423 if (err != 0) { 2424 m_free(m); 2425 goto done; 2426 } 2427 rx->info[idx].m = m; 2428 rx->shadow[idx].addr_low = 2429 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2430 rx->shadow[idx].addr_high = 2431 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2432 2433 #if MXGE_VIRT_JUMBOS 2434 for (i = 1; i < cnt; i++) { 2435 rx->shadow[idx + i].addr_low = 2436 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2437 rx->shadow[idx + i].addr_high = 2438 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2439 } 2440 #endif 2441 2442 done: 2443 for (i = 0; i < rx->nbufs; i++) { 2444 if ((idx & 7) == 7) { 2445 mxge_submit_8rx(&rx->lanai[idx - 7], 2446 &rx->shadow[idx - 7]); 2447 } 2448 idx++; 2449 } 2450 return err; 2451 } 2452 2453 /* 2454 * Myri10GE hardware checksums are not valid if the sender 2455 * padded the frame with non-zero padding. This is because 2456 * the firmware just does a simple 16-bit 1s complement 2457 * checksum across the entire frame, excluding the first 14 2458 * bytes. It is best to simply to check the checksum and 2459 * tell the stack about it only if the checksum is good 2460 */ 2461 2462 static inline uint16_t 2463 mxge_rx_csum(struct mbuf *m, int csum) 2464 { 2465 struct ether_header *eh; 2466 struct ip *ip; 2467 uint16_t c; 2468 2469 eh = mtod(m, struct ether_header *); 2470 2471 /* only deal with IPv4 TCP & UDP for now */ 2472 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2473 return 1; 2474 ip = (struct ip *)(eh + 1); 2475 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2476 ip->ip_p != IPPROTO_UDP)) 2477 return 1; 2478 #ifdef INET 2479 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2480 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2481 - (ip->ip_hl << 2) + ip->ip_p)); 2482 #else 2483 c = 1; 2484 #endif 2485 c ^= 0xffff; 2486 return (c); 2487 } 2488 2489 static void 2490 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2491 { 2492 struct ether_vlan_header *evl; 2493 struct ether_header *eh; 2494 uint32_t partial; 2495 2496 evl = mtod(m, struct ether_vlan_header *); 2497 eh = mtod(m, struct ether_header *); 2498 2499 /* 2500 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2501 * after what the firmware thought was the end of the ethernet 2502 * header. 2503 */ 2504 2505 /* put checksum into host byte order */ 2506 *csum = ntohs(*csum); 2507 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2508 (*csum) += ~partial; 2509 (*csum) += ((*csum) < ~partial); 2510 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2511 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2512 2513 /* restore checksum to network byte order; 2514 later consumers expect this */ 2515 *csum = htons(*csum); 2516 2517 /* save the tag */ 2518 #ifdef MXGE_NEW_VLAN_API 2519 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2520 #else 2521 { 2522 struct m_tag *mtag; 2523 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2524 M_NOWAIT); 2525 if (mtag == NULL) 2526 return; 2527 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2528 m_tag_prepend(m, mtag); 2529 } 2530 2531 #endif 2532 m->m_flags |= M_VLANTAG; 2533 2534 /* 2535 * Remove the 802.1q header by copying the Ethernet 2536 * addresses over it and adjusting the beginning of 2537 * the data in the mbuf. The encapsulated Ethernet 2538 * type field is already in place. 2539 */ 2540 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2541 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2542 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2543 } 2544 2545 2546 static inline void 2547 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2548 { 2549 mxge_softc_t *sc; 2550 struct ifnet *ifp; 2551 struct mbuf *m; 2552 struct ether_header *eh; 2553 mxge_rx_ring_t *rx; 2554 bus_dmamap_t old_map; 2555 int idx; 2556 uint16_t tcpudp_csum; 2557 2558 sc = ss->sc; 2559 ifp = sc->ifp; 2560 rx = &ss->rx_big; 2561 idx = rx->cnt & rx->mask; 2562 rx->cnt += rx->nbufs; 2563 /* save a pointer to the received mbuf */ 2564 m = rx->info[idx].m; 2565 /* try to replace the received mbuf */ 2566 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2567 /* drop the frame -- the old mbuf is re-cycled */ 2568 ifp->if_ierrors++; 2569 return; 2570 } 2571 2572 /* unmap the received buffer */ 2573 old_map = rx->info[idx].map; 2574 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2575 bus_dmamap_unload(rx->dmat, old_map); 2576 2577 /* swap the bus_dmamap_t's */ 2578 rx->info[idx].map = rx->extra_map; 2579 rx->extra_map = old_map; 2580 2581 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2582 * aligned */ 2583 m->m_data += MXGEFW_PAD; 2584 2585 m->m_pkthdr.rcvif = ifp; 2586 m->m_len = m->m_pkthdr.len = len; 2587 ss->ipackets++; 2588 eh = mtod(m, struct ether_header *); 2589 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2590 mxge_vlan_tag_remove(m, &csum); 2591 } 2592 /* if the checksum is valid, mark it in the mbuf header */ 2593 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2594 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2595 return; 2596 /* otherwise, it was a UDP frame, or a TCP frame which 2597 we could not do LRO on. Tell the stack that the 2598 checksum is good */ 2599 m->m_pkthdr.csum_data = 0xffff; 2600 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2601 } 2602 /* flowid only valid if RSS hashing is enabled */ 2603 if (sc->num_slices > 1) { 2604 m->m_pkthdr.flowid = (ss - sc->ss); 2605 m->m_flags |= M_FLOWID; 2606 } 2607 /* pass the frame up the stack */ 2608 (*ifp->if_input)(ifp, m); 2609 } 2610 2611 static inline void 2612 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2613 { 2614 mxge_softc_t *sc; 2615 struct ifnet *ifp; 2616 struct ether_header *eh; 2617 struct mbuf *m; 2618 mxge_rx_ring_t *rx; 2619 bus_dmamap_t old_map; 2620 int idx; 2621 uint16_t tcpudp_csum; 2622 2623 sc = ss->sc; 2624 ifp = sc->ifp; 2625 rx = &ss->rx_small; 2626 idx = rx->cnt & rx->mask; 2627 rx->cnt++; 2628 /* save a pointer to the received mbuf */ 2629 m = rx->info[idx].m; 2630 /* try to replace the received mbuf */ 2631 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2632 /* drop the frame -- the old mbuf is re-cycled */ 2633 ifp->if_ierrors++; 2634 return; 2635 } 2636 2637 /* unmap the received buffer */ 2638 old_map = rx->info[idx].map; 2639 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2640 bus_dmamap_unload(rx->dmat, old_map); 2641 2642 /* swap the bus_dmamap_t's */ 2643 rx->info[idx].map = rx->extra_map; 2644 rx->extra_map = old_map; 2645 2646 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2647 * aligned */ 2648 m->m_data += MXGEFW_PAD; 2649 2650 m->m_pkthdr.rcvif = ifp; 2651 m->m_len = m->m_pkthdr.len = len; 2652 ss->ipackets++; 2653 eh = mtod(m, struct ether_header *); 2654 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2655 mxge_vlan_tag_remove(m, &csum); 2656 } 2657 /* if the checksum is valid, mark it in the mbuf header */ 2658 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2659 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2660 return; 2661 /* otherwise, it was a UDP frame, or a TCP frame which 2662 we could not do LRO on. Tell the stack that the 2663 checksum is good */ 2664 m->m_pkthdr.csum_data = 0xffff; 2665 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2666 } 2667 /* flowid only valid if RSS hashing is enabled */ 2668 if (sc->num_slices > 1) { 2669 m->m_pkthdr.flowid = (ss - sc->ss); 2670 m->m_flags |= M_FLOWID; 2671 } 2672 /* pass the frame up the stack */ 2673 (*ifp->if_input)(ifp, m); 2674 } 2675 2676 static inline void 2677 mxge_clean_rx_done(struct mxge_slice_state *ss) 2678 { 2679 mxge_rx_done_t *rx_done = &ss->rx_done; 2680 int limit = 0; 2681 uint16_t length; 2682 uint16_t checksum; 2683 2684 2685 while (rx_done->entry[rx_done->idx].length != 0) { 2686 length = ntohs(rx_done->entry[rx_done->idx].length); 2687 rx_done->entry[rx_done->idx].length = 0; 2688 checksum = rx_done->entry[rx_done->idx].checksum; 2689 if (length <= (MHLEN - MXGEFW_PAD)) 2690 mxge_rx_done_small(ss, length, checksum); 2691 else 2692 mxge_rx_done_big(ss, length, checksum); 2693 rx_done->cnt++; 2694 rx_done->idx = rx_done->cnt & rx_done->mask; 2695 2696 /* limit potential for livelock */ 2697 if (__predict_false(++limit > rx_done->mask / 2)) 2698 break; 2699 } 2700 #ifdef INET 2701 while (!SLIST_EMPTY(&ss->lro_active)) { 2702 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2703 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2704 mxge_lro_flush(ss, lro); 2705 } 2706 #endif 2707 } 2708 2709 2710 static inline void 2711 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2712 { 2713 struct ifnet *ifp; 2714 mxge_tx_ring_t *tx; 2715 struct mbuf *m; 2716 bus_dmamap_t map; 2717 int idx; 2718 int *flags; 2719 2720 tx = &ss->tx; 2721 ifp = ss->sc->ifp; 2722 while (tx->pkt_done != mcp_idx) { 2723 idx = tx->done & tx->mask; 2724 tx->done++; 2725 m = tx->info[idx].m; 2726 /* mbuf and DMA map only attached to the first 2727 segment per-mbuf */ 2728 if (m != NULL) { 2729 ss->obytes += m->m_pkthdr.len; 2730 if (m->m_flags & M_MCAST) 2731 ss->omcasts++; 2732 ss->opackets++; 2733 tx->info[idx].m = NULL; 2734 map = tx->info[idx].map; 2735 bus_dmamap_unload(tx->dmat, map); 2736 m_freem(m); 2737 } 2738 if (tx->info[idx].flag) { 2739 tx->info[idx].flag = 0; 2740 tx->pkt_done++; 2741 } 2742 } 2743 2744 /* If we have space, clear IFF_OACTIVE to tell the stack that 2745 its OK to send packets */ 2746 #ifdef IFNET_BUF_RING 2747 flags = &ss->if_drv_flags; 2748 #else 2749 flags = &ifp->if_drv_flags; 2750 #endif 2751 mtx_lock(&ss->tx.mtx); 2752 if ((*flags) & IFF_DRV_OACTIVE && 2753 tx->req - tx->done < (tx->mask + 1)/4) { 2754 *(flags) &= ~IFF_DRV_OACTIVE; 2755 ss->tx.wake++; 2756 mxge_start_locked(ss); 2757 } 2758 #ifdef IFNET_BUF_RING 2759 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2760 /* let the NIC stop polling this queue, since there 2761 * are no more transmits pending */ 2762 if (tx->req == tx->done) { 2763 *tx->send_stop = 1; 2764 tx->queue_active = 0; 2765 tx->deactivate++; 2766 wmb(); 2767 } 2768 } 2769 #endif 2770 mtx_unlock(&ss->tx.mtx); 2771 2772 } 2773 2774 static struct mxge_media_type mxge_xfp_media_types[] = 2775 { 2776 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2777 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2778 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2779 {0, (1 << 5), "10GBASE-ER"}, 2780 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2781 {0, (1 << 3), "10GBASE-SW"}, 2782 {0, (1 << 2), "10GBASE-LW"}, 2783 {0, (1 << 1), "10GBASE-EW"}, 2784 {0, (1 << 0), "Reserved"} 2785 }; 2786 static struct mxge_media_type mxge_sfp_media_types[] = 2787 { 2788 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2789 {0, (1 << 7), "Reserved"}, 2790 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2791 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2792 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2793 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2794 }; 2795 2796 static void 2797 mxge_media_set(mxge_softc_t *sc, int media_type) 2798 { 2799 2800 2801 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2802 0, NULL); 2803 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2804 sc->current_media = media_type; 2805 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2806 } 2807 2808 static void 2809 mxge_media_init(mxge_softc_t *sc) 2810 { 2811 char *ptr; 2812 int i; 2813 2814 ifmedia_removeall(&sc->media); 2815 mxge_media_set(sc, IFM_AUTO); 2816 2817 /* 2818 * parse the product code to deterimine the interface type 2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2820 * after the 3rd dash in the driver's cached copy of the 2821 * EEPROM's product code string. 2822 */ 2823 ptr = sc->product_code_string; 2824 if (ptr == NULL) { 2825 device_printf(sc->dev, "Missing product code\n"); 2826 return; 2827 } 2828 2829 for (i = 0; i < 3; i++, ptr++) { 2830 ptr = index(ptr, '-'); 2831 if (ptr == NULL) { 2832 device_printf(sc->dev, 2833 "only %d dashes in PC?!?\n", i); 2834 return; 2835 } 2836 } 2837 if (*ptr == 'C' || *(ptr +1) == 'C') { 2838 /* -C is CX4 */ 2839 sc->connector = MXGE_CX4; 2840 mxge_media_set(sc, IFM_10G_CX4); 2841 } else if (*ptr == 'Q') { 2842 /* -Q is Quad Ribbon Fiber */ 2843 sc->connector = MXGE_QRF; 2844 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2845 /* FreeBSD has no media type for Quad ribbon fiber */ 2846 } else if (*ptr == 'R') { 2847 /* -R is XFP */ 2848 sc->connector = MXGE_XFP; 2849 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2850 /* -S or -2S is SFP+ */ 2851 sc->connector = MXGE_SFP; 2852 } else { 2853 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2854 } 2855 } 2856 2857 /* 2858 * Determine the media type for a NIC. Some XFPs will identify 2859 * themselves only when their link is up, so this is initiated via a 2860 * link up interrupt. However, this can potentially take up to 2861 * several milliseconds, so it is run via the watchdog routine, rather 2862 * than in the interrupt handler itself. 2863 */ 2864 static void 2865 mxge_media_probe(mxge_softc_t *sc) 2866 { 2867 mxge_cmd_t cmd; 2868 char *cage_type; 2869 2870 struct mxge_media_type *mxge_media_types = NULL; 2871 int i, err, ms, mxge_media_type_entries; 2872 uint32_t byte; 2873 2874 sc->need_media_probe = 0; 2875 2876 if (sc->connector == MXGE_XFP) { 2877 /* -R is XFP */ 2878 mxge_media_types = mxge_xfp_media_types; 2879 mxge_media_type_entries = 2880 sizeof (mxge_xfp_media_types) / 2881 sizeof (mxge_xfp_media_types[0]); 2882 byte = MXGE_XFP_COMPLIANCE_BYTE; 2883 cage_type = "XFP"; 2884 } else if (sc->connector == MXGE_SFP) { 2885 /* -S or -2S is SFP+ */ 2886 mxge_media_types = mxge_sfp_media_types; 2887 mxge_media_type_entries = 2888 sizeof (mxge_sfp_media_types) / 2889 sizeof (mxge_sfp_media_types[0]); 2890 cage_type = "SFP+"; 2891 byte = 3; 2892 } else { 2893 /* nothing to do; media type cannot change */ 2894 return; 2895 } 2896 2897 /* 2898 * At this point we know the NIC has an XFP cage, so now we 2899 * try to determine what is in the cage by using the 2900 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2901 * register. We read just one byte, which may take over 2902 * a millisecond 2903 */ 2904 2905 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2906 cmd.data1 = byte; 2907 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2908 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2909 device_printf(sc->dev, "failed to read XFP\n"); 2910 } 2911 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2912 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2913 } 2914 if (err != MXGEFW_CMD_OK) { 2915 return; 2916 } 2917 2918 /* now we wait for the data to be cached */ 2919 cmd.data0 = byte; 2920 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2921 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2922 DELAY(1000); 2923 cmd.data0 = byte; 2924 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2925 } 2926 if (err != MXGEFW_CMD_OK) { 2927 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2928 cage_type, err, ms); 2929 return; 2930 } 2931 2932 if (cmd.data0 == mxge_media_types[0].bitmask) { 2933 if (mxge_verbose) 2934 device_printf(sc->dev, "%s:%s\n", cage_type, 2935 mxge_media_types[0].name); 2936 if (sc->current_media != mxge_media_types[0].flag) { 2937 mxge_media_init(sc); 2938 mxge_media_set(sc, mxge_media_types[0].flag); 2939 } 2940 return; 2941 } 2942 for (i = 1; i < mxge_media_type_entries; i++) { 2943 if (cmd.data0 & mxge_media_types[i].bitmask) { 2944 if (mxge_verbose) 2945 device_printf(sc->dev, "%s:%s\n", 2946 cage_type, 2947 mxge_media_types[i].name); 2948 2949 if (sc->current_media != mxge_media_types[i].flag) { 2950 mxge_media_init(sc); 2951 mxge_media_set(sc, mxge_media_types[i].flag); 2952 } 2953 return; 2954 } 2955 } 2956 if (mxge_verbose) 2957 device_printf(sc->dev, "%s media 0x%x unknown\n", 2958 cage_type, cmd.data0); 2959 2960 return; 2961 } 2962 2963 static void 2964 mxge_intr(void *arg) 2965 { 2966 struct mxge_slice_state *ss = arg; 2967 mxge_softc_t *sc = ss->sc; 2968 mcp_irq_data_t *stats = ss->fw_stats; 2969 mxge_tx_ring_t *tx = &ss->tx; 2970 mxge_rx_done_t *rx_done = &ss->rx_done; 2971 uint32_t send_done_count; 2972 uint8_t valid; 2973 2974 2975 #ifndef IFNET_BUF_RING 2976 /* an interrupt on a non-zero slice is implicitly valid 2977 since MSI-X irqs are not shared */ 2978 if (ss != sc->ss) { 2979 mxge_clean_rx_done(ss); 2980 *ss->irq_claim = be32toh(3); 2981 return; 2982 } 2983 #endif 2984 2985 /* make sure the DMA has finished */ 2986 if (!stats->valid) { 2987 return; 2988 } 2989 valid = stats->valid; 2990 2991 if (sc->legacy_irq) { 2992 /* lower legacy IRQ */ 2993 *sc->irq_deassert = 0; 2994 if (!mxge_deassert_wait) 2995 /* don't wait for conf. that irq is low */ 2996 stats->valid = 0; 2997 } else { 2998 stats->valid = 0; 2999 } 3000 3001 /* loop while waiting for legacy irq deassertion */ 3002 do { 3003 /* check for transmit completes and receives */ 3004 send_done_count = be32toh(stats->send_done_count); 3005 while ((send_done_count != tx->pkt_done) || 3006 (rx_done->entry[rx_done->idx].length != 0)) { 3007 if (send_done_count != tx->pkt_done) 3008 mxge_tx_done(ss, (int)send_done_count); 3009 mxge_clean_rx_done(ss); 3010 send_done_count = be32toh(stats->send_done_count); 3011 } 3012 if (sc->legacy_irq && mxge_deassert_wait) 3013 wmb(); 3014 } while (*((volatile uint8_t *) &stats->valid)); 3015 3016 /* fw link & error stats meaningful only on the first slice */ 3017 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3018 if (sc->link_state != stats->link_up) { 3019 sc->link_state = stats->link_up; 3020 if (sc->link_state) { 3021 if_link_state_change(sc->ifp, LINK_STATE_UP); 3022 sc->ifp->if_baudrate = IF_Gbps(10UL); 3023 if (mxge_verbose) 3024 device_printf(sc->dev, "link up\n"); 3025 } else { 3026 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3027 sc->ifp->if_baudrate = 0; 3028 if (mxge_verbose) 3029 device_printf(sc->dev, "link down\n"); 3030 } 3031 sc->need_media_probe = 1; 3032 } 3033 if (sc->rdma_tags_available != 3034 be32toh(stats->rdma_tags_available)) { 3035 sc->rdma_tags_available = 3036 be32toh(stats->rdma_tags_available); 3037 device_printf(sc->dev, "RDMA timed out! %d tags " 3038 "left\n", sc->rdma_tags_available); 3039 } 3040 3041 if (stats->link_down) { 3042 sc->down_cnt += stats->link_down; 3043 sc->link_state = 0; 3044 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3045 } 3046 } 3047 3048 /* check to see if we have rx token to pass back */ 3049 if (valid & 0x1) 3050 *ss->irq_claim = be32toh(3); 3051 *(ss->irq_claim + 1) = be32toh(3); 3052 } 3053 3054 static void 3055 mxge_init(void *arg) 3056 { 3057 mxge_softc_t *sc = arg; 3058 struct ifnet *ifp = sc->ifp; 3059 3060 3061 mtx_lock(&sc->driver_mtx); 3062 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3063 (void) mxge_open(sc); 3064 mtx_unlock(&sc->driver_mtx); 3065 } 3066 3067 3068 3069 static void 3070 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3071 { 3072 struct lro_entry *lro_entry; 3073 int i; 3074 3075 while (!SLIST_EMPTY(&ss->lro_free)) { 3076 lro_entry = SLIST_FIRST(&ss->lro_free); 3077 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3078 free(lro_entry, M_DEVBUF); 3079 } 3080 3081 for (i = 0; i <= ss->rx_big.mask; i++) { 3082 if (ss->rx_big.info[i].m == NULL) 3083 continue; 3084 bus_dmamap_unload(ss->rx_big.dmat, 3085 ss->rx_big.info[i].map); 3086 m_freem(ss->rx_big.info[i].m); 3087 ss->rx_big.info[i].m = NULL; 3088 } 3089 3090 for (i = 0; i <= ss->rx_small.mask; i++) { 3091 if (ss->rx_small.info[i].m == NULL) 3092 continue; 3093 bus_dmamap_unload(ss->rx_small.dmat, 3094 ss->rx_small.info[i].map); 3095 m_freem(ss->rx_small.info[i].m); 3096 ss->rx_small.info[i].m = NULL; 3097 } 3098 3099 /* transmit ring used only on the first slice */ 3100 if (ss->tx.info == NULL) 3101 return; 3102 3103 for (i = 0; i <= ss->tx.mask; i++) { 3104 ss->tx.info[i].flag = 0; 3105 if (ss->tx.info[i].m == NULL) 3106 continue; 3107 bus_dmamap_unload(ss->tx.dmat, 3108 ss->tx.info[i].map); 3109 m_freem(ss->tx.info[i].m); 3110 ss->tx.info[i].m = NULL; 3111 } 3112 } 3113 3114 static void 3115 mxge_free_mbufs(mxge_softc_t *sc) 3116 { 3117 int slice; 3118 3119 for (slice = 0; slice < sc->num_slices; slice++) 3120 mxge_free_slice_mbufs(&sc->ss[slice]); 3121 } 3122 3123 static void 3124 mxge_free_slice_rings(struct mxge_slice_state *ss) 3125 { 3126 int i; 3127 3128 3129 if (ss->rx_done.entry != NULL) 3130 mxge_dma_free(&ss->rx_done.dma); 3131 ss->rx_done.entry = NULL; 3132 3133 if (ss->tx.req_bytes != NULL) 3134 free(ss->tx.req_bytes, M_DEVBUF); 3135 ss->tx.req_bytes = NULL; 3136 3137 if (ss->tx.seg_list != NULL) 3138 free(ss->tx.seg_list, M_DEVBUF); 3139 ss->tx.seg_list = NULL; 3140 3141 if (ss->rx_small.shadow != NULL) 3142 free(ss->rx_small.shadow, M_DEVBUF); 3143 ss->rx_small.shadow = NULL; 3144 3145 if (ss->rx_big.shadow != NULL) 3146 free(ss->rx_big.shadow, M_DEVBUF); 3147 ss->rx_big.shadow = NULL; 3148 3149 if (ss->tx.info != NULL) { 3150 if (ss->tx.dmat != NULL) { 3151 for (i = 0; i <= ss->tx.mask; i++) { 3152 bus_dmamap_destroy(ss->tx.dmat, 3153 ss->tx.info[i].map); 3154 } 3155 bus_dma_tag_destroy(ss->tx.dmat); 3156 } 3157 free(ss->tx.info, M_DEVBUF); 3158 } 3159 ss->tx.info = NULL; 3160 3161 if (ss->rx_small.info != NULL) { 3162 if (ss->rx_small.dmat != NULL) { 3163 for (i = 0; i <= ss->rx_small.mask; i++) { 3164 bus_dmamap_destroy(ss->rx_small.dmat, 3165 ss->rx_small.info[i].map); 3166 } 3167 bus_dmamap_destroy(ss->rx_small.dmat, 3168 ss->rx_small.extra_map); 3169 bus_dma_tag_destroy(ss->rx_small.dmat); 3170 } 3171 free(ss->rx_small.info, M_DEVBUF); 3172 } 3173 ss->rx_small.info = NULL; 3174 3175 if (ss->rx_big.info != NULL) { 3176 if (ss->rx_big.dmat != NULL) { 3177 for (i = 0; i <= ss->rx_big.mask; i++) { 3178 bus_dmamap_destroy(ss->rx_big.dmat, 3179 ss->rx_big.info[i].map); 3180 } 3181 bus_dmamap_destroy(ss->rx_big.dmat, 3182 ss->rx_big.extra_map); 3183 bus_dma_tag_destroy(ss->rx_big.dmat); 3184 } 3185 free(ss->rx_big.info, M_DEVBUF); 3186 } 3187 ss->rx_big.info = NULL; 3188 } 3189 3190 static void 3191 mxge_free_rings(mxge_softc_t *sc) 3192 { 3193 int slice; 3194 3195 for (slice = 0; slice < sc->num_slices; slice++) 3196 mxge_free_slice_rings(&sc->ss[slice]); 3197 } 3198 3199 static int 3200 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3201 int tx_ring_entries) 3202 { 3203 mxge_softc_t *sc = ss->sc; 3204 size_t bytes; 3205 int err, i; 3206 3207 err = ENOMEM; 3208 3209 /* allocate per-slice receive resources */ 3210 3211 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3212 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3213 3214 /* allocate the rx shadow rings */ 3215 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3216 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3217 if (ss->rx_small.shadow == NULL) 3218 return err; 3219 3220 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3221 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3222 if (ss->rx_big.shadow == NULL) 3223 return err; 3224 3225 /* allocate the rx host info rings */ 3226 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3227 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3228 if (ss->rx_small.info == NULL) 3229 return err; 3230 3231 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3232 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3233 if (ss->rx_big.info == NULL) 3234 return err; 3235 3236 /* allocate the rx busdma resources */ 3237 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3238 1, /* alignment */ 3239 4096, /* boundary */ 3240 BUS_SPACE_MAXADDR, /* low */ 3241 BUS_SPACE_MAXADDR, /* high */ 3242 NULL, NULL, /* filter */ 3243 MHLEN, /* maxsize */ 3244 1, /* num segs */ 3245 MHLEN, /* maxsegsize */ 3246 BUS_DMA_ALLOCNOW, /* flags */ 3247 NULL, NULL, /* lock */ 3248 &ss->rx_small.dmat); /* tag */ 3249 if (err != 0) { 3250 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3251 err); 3252 return err; 3253 } 3254 3255 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3256 1, /* alignment */ 3257 #if MXGE_VIRT_JUMBOS 3258 4096, /* boundary */ 3259 #else 3260 0, /* boundary */ 3261 #endif 3262 BUS_SPACE_MAXADDR, /* low */ 3263 BUS_SPACE_MAXADDR, /* high */ 3264 NULL, NULL, /* filter */ 3265 3*4096, /* maxsize */ 3266 #if MXGE_VIRT_JUMBOS 3267 3, /* num segs */ 3268 4096, /* maxsegsize*/ 3269 #else 3270 1, /* num segs */ 3271 MJUM9BYTES, /* maxsegsize*/ 3272 #endif 3273 BUS_DMA_ALLOCNOW, /* flags */ 3274 NULL, NULL, /* lock */ 3275 &ss->rx_big.dmat); /* tag */ 3276 if (err != 0) { 3277 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3278 err); 3279 return err; 3280 } 3281 for (i = 0; i <= ss->rx_small.mask; i++) { 3282 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3283 &ss->rx_small.info[i].map); 3284 if (err != 0) { 3285 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3286 err); 3287 return err; 3288 } 3289 } 3290 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3291 &ss->rx_small.extra_map); 3292 if (err != 0) { 3293 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3294 err); 3295 return err; 3296 } 3297 3298 for (i = 0; i <= ss->rx_big.mask; i++) { 3299 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3300 &ss->rx_big.info[i].map); 3301 if (err != 0) { 3302 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3303 err); 3304 return err; 3305 } 3306 } 3307 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3308 &ss->rx_big.extra_map); 3309 if (err != 0) { 3310 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3311 err); 3312 return err; 3313 } 3314 3315 /* now allocate TX resouces */ 3316 3317 #ifndef IFNET_BUF_RING 3318 /* only use a single TX ring for now */ 3319 if (ss != ss->sc->ss) 3320 return 0; 3321 #endif 3322 3323 ss->tx.mask = tx_ring_entries - 1; 3324 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3325 3326 3327 /* allocate the tx request copy block */ 3328 bytes = 8 + 3329 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3330 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3331 if (ss->tx.req_bytes == NULL) 3332 return err; 3333 /* ensure req_list entries are aligned to 8 bytes */ 3334 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3335 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3336 3337 /* allocate the tx busdma segment list */ 3338 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3339 ss->tx.seg_list = (bus_dma_segment_t *) 3340 malloc(bytes, M_DEVBUF, M_WAITOK); 3341 if (ss->tx.seg_list == NULL) 3342 return err; 3343 3344 /* allocate the tx host info ring */ 3345 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3346 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3347 if (ss->tx.info == NULL) 3348 return err; 3349 3350 /* allocate the tx busdma resources */ 3351 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3352 1, /* alignment */ 3353 sc->tx_boundary, /* boundary */ 3354 BUS_SPACE_MAXADDR, /* low */ 3355 BUS_SPACE_MAXADDR, /* high */ 3356 NULL, NULL, /* filter */ 3357 65536 + 256, /* maxsize */ 3358 ss->tx.max_desc - 2, /* num segs */ 3359 sc->tx_boundary, /* maxsegsz */ 3360 BUS_DMA_ALLOCNOW, /* flags */ 3361 NULL, NULL, /* lock */ 3362 &ss->tx.dmat); /* tag */ 3363 3364 if (err != 0) { 3365 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3366 err); 3367 return err; 3368 } 3369 3370 /* now use these tags to setup dmamaps for each slot 3371 in the ring */ 3372 for (i = 0; i <= ss->tx.mask; i++) { 3373 err = bus_dmamap_create(ss->tx.dmat, 0, 3374 &ss->tx.info[i].map); 3375 if (err != 0) { 3376 device_printf(sc->dev, "Err %d tx dmamap\n", 3377 err); 3378 return err; 3379 } 3380 } 3381 return 0; 3382 3383 } 3384 3385 static int 3386 mxge_alloc_rings(mxge_softc_t *sc) 3387 { 3388 mxge_cmd_t cmd; 3389 int tx_ring_size; 3390 int tx_ring_entries, rx_ring_entries; 3391 int err, slice; 3392 3393 /* get ring sizes */ 3394 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3395 tx_ring_size = cmd.data0; 3396 if (err != 0) { 3397 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3398 goto abort; 3399 } 3400 3401 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3402 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3403 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3404 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3405 IFQ_SET_READY(&sc->ifp->if_snd); 3406 3407 for (slice = 0; slice < sc->num_slices; slice++) { 3408 err = mxge_alloc_slice_rings(&sc->ss[slice], 3409 rx_ring_entries, 3410 tx_ring_entries); 3411 if (err != 0) 3412 goto abort; 3413 } 3414 return 0; 3415 3416 abort: 3417 mxge_free_rings(sc); 3418 return err; 3419 3420 } 3421 3422 3423 static void 3424 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3425 { 3426 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3427 3428 if (bufsize < MCLBYTES) { 3429 /* easy, everything fits in a single buffer */ 3430 *big_buf_size = MCLBYTES; 3431 *cl_size = MCLBYTES; 3432 *nbufs = 1; 3433 return; 3434 } 3435 3436 if (bufsize < MJUMPAGESIZE) { 3437 /* still easy, everything still fits in a single buffer */ 3438 *big_buf_size = MJUMPAGESIZE; 3439 *cl_size = MJUMPAGESIZE; 3440 *nbufs = 1; 3441 return; 3442 } 3443 #if MXGE_VIRT_JUMBOS 3444 /* now we need to use virtually contiguous buffers */ 3445 *cl_size = MJUM9BYTES; 3446 *big_buf_size = 4096; 3447 *nbufs = mtu / 4096 + 1; 3448 /* needs to be a power of two, so round up */ 3449 if (*nbufs == 3) 3450 *nbufs = 4; 3451 #else 3452 *cl_size = MJUM9BYTES; 3453 *big_buf_size = MJUM9BYTES; 3454 *nbufs = 1; 3455 #endif 3456 } 3457 3458 static int 3459 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3460 { 3461 mxge_softc_t *sc; 3462 mxge_cmd_t cmd; 3463 bus_dmamap_t map; 3464 struct lro_entry *lro_entry; 3465 int err, i, slice; 3466 3467 3468 sc = ss->sc; 3469 slice = ss - sc->ss; 3470 3471 SLIST_INIT(&ss->lro_free); 3472 SLIST_INIT(&ss->lro_active); 3473 3474 for (i = 0; i < sc->lro_cnt; i++) { 3475 lro_entry = (struct lro_entry *) 3476 malloc(sizeof (*lro_entry), M_DEVBUF, 3477 M_NOWAIT | M_ZERO); 3478 if (lro_entry == NULL) { 3479 sc->lro_cnt = i; 3480 break; 3481 } 3482 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3483 } 3484 /* get the lanai pointers to the send and receive rings */ 3485 3486 err = 0; 3487 #ifndef IFNET_BUF_RING 3488 /* We currently only send from the first slice */ 3489 if (slice == 0) { 3490 #endif 3491 cmd.data0 = slice; 3492 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3493 ss->tx.lanai = 3494 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3495 ss->tx.send_go = (volatile uint32_t *) 3496 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3497 ss->tx.send_stop = (volatile uint32_t *) 3498 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3499 #ifndef IFNET_BUF_RING 3500 } 3501 #endif 3502 cmd.data0 = slice; 3503 err |= mxge_send_cmd(sc, 3504 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3505 ss->rx_small.lanai = 3506 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3507 cmd.data0 = slice; 3508 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3509 ss->rx_big.lanai = 3510 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3511 3512 if (err != 0) { 3513 device_printf(sc->dev, 3514 "failed to get ring sizes or locations\n"); 3515 return EIO; 3516 } 3517 3518 /* stock receive rings */ 3519 for (i = 0; i <= ss->rx_small.mask; i++) { 3520 map = ss->rx_small.info[i].map; 3521 err = mxge_get_buf_small(ss, map, i); 3522 if (err) { 3523 device_printf(sc->dev, "alloced %d/%d smalls\n", 3524 i, ss->rx_small.mask + 1); 3525 return ENOMEM; 3526 } 3527 } 3528 for (i = 0; i <= ss->rx_big.mask; i++) { 3529 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3530 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3531 } 3532 ss->rx_big.nbufs = nbufs; 3533 ss->rx_big.cl_size = cl_size; 3534 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3535 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3536 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3537 map = ss->rx_big.info[i].map; 3538 err = mxge_get_buf_big(ss, map, i); 3539 if (err) { 3540 device_printf(sc->dev, "alloced %d/%d bigs\n", 3541 i, ss->rx_big.mask + 1); 3542 return ENOMEM; 3543 } 3544 } 3545 return 0; 3546 } 3547 3548 static int 3549 mxge_open(mxge_softc_t *sc) 3550 { 3551 mxge_cmd_t cmd; 3552 int err, big_bytes, nbufs, slice, cl_size, i; 3553 bus_addr_t bus; 3554 volatile uint8_t *itable; 3555 struct mxge_slice_state *ss; 3556 3557 /* Copy the MAC address in case it was overridden */ 3558 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3559 3560 err = mxge_reset(sc, 1); 3561 if (err != 0) { 3562 device_printf(sc->dev, "failed to reset\n"); 3563 return EIO; 3564 } 3565 3566 if (sc->num_slices > 1) { 3567 /* setup the indirection table */ 3568 cmd.data0 = sc->num_slices; 3569 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3570 &cmd); 3571 3572 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3573 &cmd); 3574 if (err != 0) { 3575 device_printf(sc->dev, 3576 "failed to setup rss tables\n"); 3577 return err; 3578 } 3579 3580 /* just enable an identity mapping */ 3581 itable = sc->sram + cmd.data0; 3582 for (i = 0; i < sc->num_slices; i++) 3583 itable[i] = (uint8_t)i; 3584 3585 cmd.data0 = 1; 3586 cmd.data1 = mxge_rss_hash_type; 3587 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3588 if (err != 0) { 3589 device_printf(sc->dev, "failed to enable slices\n"); 3590 return err; 3591 } 3592 } 3593 3594 3595 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3596 3597 cmd.data0 = nbufs; 3598 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3599 &cmd); 3600 /* error is only meaningful if we're trying to set 3601 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3602 if (err && nbufs > 1) { 3603 device_printf(sc->dev, 3604 "Failed to set alway-use-n to %d\n", 3605 nbufs); 3606 return EIO; 3607 } 3608 /* Give the firmware the mtu and the big and small buffer 3609 sizes. The firmware wants the big buf size to be a power 3610 of two. Luckily, FreeBSD's clusters are powers of two */ 3611 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3612 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3613 cmd.data0 = MHLEN - MXGEFW_PAD; 3614 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3615 &cmd); 3616 cmd.data0 = big_bytes; 3617 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3618 3619 if (err != 0) { 3620 device_printf(sc->dev, "failed to setup params\n"); 3621 goto abort; 3622 } 3623 3624 /* Now give him the pointer to the stats block */ 3625 for (slice = 0; 3626 #ifdef IFNET_BUF_RING 3627 slice < sc->num_slices; 3628 #else 3629 slice < 1; 3630 #endif 3631 slice++) { 3632 ss = &sc->ss[slice]; 3633 cmd.data0 = 3634 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3635 cmd.data1 = 3636 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3637 cmd.data2 = sizeof(struct mcp_irq_data); 3638 cmd.data2 |= (slice << 16); 3639 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3640 } 3641 3642 if (err != 0) { 3643 bus = sc->ss->fw_stats_dma.bus_addr; 3644 bus += offsetof(struct mcp_irq_data, send_done_count); 3645 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3646 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3647 err = mxge_send_cmd(sc, 3648 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3649 &cmd); 3650 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3651 sc->fw_multicast_support = 0; 3652 } else { 3653 sc->fw_multicast_support = 1; 3654 } 3655 3656 if (err != 0) { 3657 device_printf(sc->dev, "failed to setup params\n"); 3658 goto abort; 3659 } 3660 3661 for (slice = 0; slice < sc->num_slices; slice++) { 3662 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3663 if (err != 0) { 3664 device_printf(sc->dev, "couldn't open slice %d\n", 3665 slice); 3666 goto abort; 3667 } 3668 } 3669 3670 /* Finally, start the firmware running */ 3671 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3672 if (err) { 3673 device_printf(sc->dev, "Couldn't bring up link\n"); 3674 goto abort; 3675 } 3676 #ifdef IFNET_BUF_RING 3677 for (slice = 0; slice < sc->num_slices; slice++) { 3678 ss = &sc->ss[slice]; 3679 ss->if_drv_flags |= IFF_DRV_RUNNING; 3680 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3681 } 3682 #endif 3683 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3684 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3685 3686 return 0; 3687 3688 3689 abort: 3690 mxge_free_mbufs(sc); 3691 3692 return err; 3693 } 3694 3695 static int 3696 mxge_close(mxge_softc_t *sc, int down) 3697 { 3698 mxge_cmd_t cmd; 3699 int err, old_down_cnt; 3700 #ifdef IFNET_BUF_RING 3701 struct mxge_slice_state *ss; 3702 int slice; 3703 #endif 3704 3705 #ifdef IFNET_BUF_RING 3706 for (slice = 0; slice < sc->num_slices; slice++) { 3707 ss = &sc->ss[slice]; 3708 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3709 } 3710 #endif 3711 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3712 if (!down) { 3713 old_down_cnt = sc->down_cnt; 3714 wmb(); 3715 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3716 if (err) { 3717 device_printf(sc->dev, 3718 "Couldn't bring down link\n"); 3719 } 3720 if (old_down_cnt == sc->down_cnt) { 3721 /* wait for down irq */ 3722 DELAY(10 * sc->intr_coal_delay); 3723 } 3724 wmb(); 3725 if (old_down_cnt == sc->down_cnt) { 3726 device_printf(sc->dev, "never got down irq\n"); 3727 } 3728 } 3729 mxge_free_mbufs(sc); 3730 3731 return 0; 3732 } 3733 3734 static void 3735 mxge_setup_cfg_space(mxge_softc_t *sc) 3736 { 3737 device_t dev = sc->dev; 3738 int reg; 3739 uint16_t cmd, lnk, pectl; 3740 3741 /* find the PCIe link width and set max read request to 4KB*/ 3742 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3743 lnk = pci_read_config(dev, reg + 0x12, 2); 3744 sc->link_width = (lnk >> 4) & 0x3f; 3745 3746 if (sc->pectl == 0) { 3747 pectl = pci_read_config(dev, reg + 0x8, 2); 3748 pectl = (pectl & ~0x7000) | (5 << 12); 3749 pci_write_config(dev, reg + 0x8, pectl, 2); 3750 sc->pectl = pectl; 3751 } else { 3752 /* restore saved pectl after watchdog reset */ 3753 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3754 } 3755 } 3756 3757 /* Enable DMA and Memory space access */ 3758 pci_enable_busmaster(dev); 3759 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3760 cmd |= PCIM_CMD_MEMEN; 3761 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3762 } 3763 3764 static uint32_t 3765 mxge_read_reboot(mxge_softc_t *sc) 3766 { 3767 device_t dev = sc->dev; 3768 uint32_t vs; 3769 3770 /* find the vendor specific offset */ 3771 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3772 device_printf(sc->dev, 3773 "could not find vendor specific offset\n"); 3774 return (uint32_t)-1; 3775 } 3776 /* enable read32 mode */ 3777 pci_write_config(dev, vs + 0x10, 0x3, 1); 3778 /* tell NIC which register to read */ 3779 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3780 return (pci_read_config(dev, vs + 0x14, 4)); 3781 } 3782 3783 static void 3784 mxge_watchdog_reset(mxge_softc_t *sc) 3785 { 3786 struct pci_devinfo *dinfo; 3787 struct mxge_slice_state *ss; 3788 int err, running, s, num_tx_slices = 1; 3789 uint32_t reboot; 3790 uint16_t cmd; 3791 3792 err = ENXIO; 3793 3794 device_printf(sc->dev, "Watchdog reset!\n"); 3795 3796 /* 3797 * check to see if the NIC rebooted. If it did, then all of 3798 * PCI config space has been reset, and things like the 3799 * busmaster bit will be zero. If this is the case, then we 3800 * must restore PCI config space before the NIC can be used 3801 * again 3802 */ 3803 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3804 if (cmd == 0xffff) { 3805 /* 3806 * maybe the watchdog caught the NIC rebooting; wait 3807 * up to 100ms for it to finish. If it does not come 3808 * back, then give up 3809 */ 3810 DELAY(1000*100); 3811 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3812 if (cmd == 0xffff) { 3813 device_printf(sc->dev, "NIC disappeared!\n"); 3814 } 3815 } 3816 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3817 /* print the reboot status */ 3818 reboot = mxge_read_reboot(sc); 3819 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3820 reboot); 3821 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3822 if (running) { 3823 3824 /* 3825 * quiesce NIC so that TX routines will not try to 3826 * xmit after restoration of BAR 3827 */ 3828 3829 /* Mark the link as down */ 3830 if (sc->link_state) { 3831 sc->link_state = 0; 3832 if_link_state_change(sc->ifp, 3833 LINK_STATE_DOWN); 3834 } 3835 #ifdef IFNET_BUF_RING 3836 num_tx_slices = sc->num_slices; 3837 #endif 3838 /* grab all TX locks to ensure no tx */ 3839 for (s = 0; s < num_tx_slices; s++) { 3840 ss = &sc->ss[s]; 3841 mtx_lock(&ss->tx.mtx); 3842 } 3843 mxge_close(sc, 1); 3844 } 3845 /* restore PCI configuration space */ 3846 dinfo = device_get_ivars(sc->dev); 3847 pci_cfg_restore(sc->dev, dinfo); 3848 3849 /* and redo any changes we made to our config space */ 3850 mxge_setup_cfg_space(sc); 3851 3852 /* reload f/w */ 3853 err = mxge_load_firmware(sc, 0); 3854 if (err) { 3855 device_printf(sc->dev, 3856 "Unable to re-load f/w\n"); 3857 } 3858 if (running) { 3859 if (!err) 3860 err = mxge_open(sc); 3861 /* release all TX locks */ 3862 for (s = 0; s < num_tx_slices; s++) { 3863 ss = &sc->ss[s]; 3864 #ifdef IFNET_BUF_RING 3865 mxge_start_locked(ss); 3866 #endif 3867 mtx_unlock(&ss->tx.mtx); 3868 } 3869 } 3870 sc->watchdog_resets++; 3871 } else { 3872 device_printf(sc->dev, 3873 "NIC did not reboot, not resetting\n"); 3874 err = 0; 3875 } 3876 if (err) { 3877 device_printf(sc->dev, "watchdog reset failed\n"); 3878 } else { 3879 if (sc->dying == 2) 3880 sc->dying = 0; 3881 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3882 } 3883 } 3884 3885 static void 3886 mxge_watchdog_task(void *arg, int pending) 3887 { 3888 mxge_softc_t *sc = arg; 3889 3890 3891 mtx_lock(&sc->driver_mtx); 3892 mxge_watchdog_reset(sc); 3893 mtx_unlock(&sc->driver_mtx); 3894 } 3895 3896 static void 3897 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3898 { 3899 tx = &sc->ss[slice].tx; 3900 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3901 device_printf(sc->dev, 3902 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3903 tx->req, tx->done, tx->queue_active); 3904 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3905 tx->activate, tx->deactivate); 3906 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3907 tx->pkt_done, 3908 be32toh(sc->ss->fw_stats->send_done_count)); 3909 } 3910 3911 static int 3912 mxge_watchdog(mxge_softc_t *sc) 3913 { 3914 mxge_tx_ring_t *tx; 3915 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3916 int i, err = 0; 3917 3918 /* see if we have outstanding transmits, which 3919 have been pending for more than mxge_ticks */ 3920 for (i = 0; 3921 #ifdef IFNET_BUF_RING 3922 (i < sc->num_slices) && (err == 0); 3923 #else 3924 (i < 1) && (err == 0); 3925 #endif 3926 i++) { 3927 tx = &sc->ss[i].tx; 3928 if (tx->req != tx->done && 3929 tx->watchdog_req != tx->watchdog_done && 3930 tx->done == tx->watchdog_done) { 3931 /* check for pause blocking before resetting */ 3932 if (tx->watchdog_rx_pause == rx_pause) { 3933 mxge_warn_stuck(sc, tx, i); 3934 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3935 return (ENXIO); 3936 } 3937 else 3938 device_printf(sc->dev, "Flow control blocking " 3939 "xmits, check link partner\n"); 3940 } 3941 3942 tx->watchdog_req = tx->req; 3943 tx->watchdog_done = tx->done; 3944 tx->watchdog_rx_pause = rx_pause; 3945 } 3946 3947 if (sc->need_media_probe) 3948 mxge_media_probe(sc); 3949 return (err); 3950 } 3951 3952 static u_long 3953 mxge_update_stats(mxge_softc_t *sc) 3954 { 3955 struct mxge_slice_state *ss; 3956 u_long pkts = 0; 3957 u_long ipackets = 0; 3958 u_long opackets = 0; 3959 #ifdef IFNET_BUF_RING 3960 u_long obytes = 0; 3961 u_long omcasts = 0; 3962 u_long odrops = 0; 3963 #endif 3964 u_long oerrors = 0; 3965 int slice; 3966 3967 for (slice = 0; slice < sc->num_slices; slice++) { 3968 ss = &sc->ss[slice]; 3969 ipackets += ss->ipackets; 3970 opackets += ss->opackets; 3971 #ifdef IFNET_BUF_RING 3972 obytes += ss->obytes; 3973 omcasts += ss->omcasts; 3974 odrops += ss->tx.br->br_drops; 3975 #endif 3976 oerrors += ss->oerrors; 3977 } 3978 pkts = (ipackets - sc->ifp->if_ipackets); 3979 pkts += (opackets - sc->ifp->if_opackets); 3980 sc->ifp->if_ipackets = ipackets; 3981 sc->ifp->if_opackets = opackets; 3982 #ifdef IFNET_BUF_RING 3983 sc->ifp->if_obytes = obytes; 3984 sc->ifp->if_omcasts = omcasts; 3985 sc->ifp->if_snd.ifq_drops = odrops; 3986 #endif 3987 sc->ifp->if_oerrors = oerrors; 3988 return pkts; 3989 } 3990 3991 static void 3992 mxge_tick(void *arg) 3993 { 3994 mxge_softc_t *sc = arg; 3995 u_long pkts = 0; 3996 int err = 0; 3997 int running, ticks; 3998 uint16_t cmd; 3999 4000 ticks = mxge_ticks; 4001 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4002 if (running) { 4003 /* aggregate stats from different slices */ 4004 pkts = mxge_update_stats(sc); 4005 if (!sc->watchdog_countdown) { 4006 err = mxge_watchdog(sc); 4007 sc->watchdog_countdown = 4; 4008 } 4009 sc->watchdog_countdown--; 4010 } 4011 if (pkts == 0) { 4012 /* ensure NIC did not suffer h/w fault while idle */ 4013 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4014 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4015 sc->dying = 2; 4016 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4017 err = ENXIO; 4018 } 4019 /* look less often if NIC is idle */ 4020 ticks *= 4; 4021 } 4022 4023 if (err == 0) 4024 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4025 4026 } 4027 4028 static int 4029 mxge_media_change(struct ifnet *ifp) 4030 { 4031 return EINVAL; 4032 } 4033 4034 static int 4035 mxge_change_mtu(mxge_softc_t *sc, int mtu) 4036 { 4037 struct ifnet *ifp = sc->ifp; 4038 int real_mtu, old_mtu; 4039 int err = 0; 4040 4041 4042 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4043 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4044 return EINVAL; 4045 mtx_lock(&sc->driver_mtx); 4046 old_mtu = ifp->if_mtu; 4047 ifp->if_mtu = mtu; 4048 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4049 mxge_close(sc, 0); 4050 err = mxge_open(sc); 4051 if (err != 0) { 4052 ifp->if_mtu = old_mtu; 4053 mxge_close(sc, 0); 4054 (void) mxge_open(sc); 4055 } 4056 } 4057 mtx_unlock(&sc->driver_mtx); 4058 return err; 4059 } 4060 4061 static void 4062 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4063 { 4064 mxge_softc_t *sc = ifp->if_softc; 4065 4066 4067 if (sc == NULL) 4068 return; 4069 ifmr->ifm_status = IFM_AVALID; 4070 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4071 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4072 ifmr->ifm_active |= sc->current_media; 4073 } 4074 4075 static int 4076 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4077 { 4078 mxge_softc_t *sc = ifp->if_softc; 4079 struct ifreq *ifr = (struct ifreq *)data; 4080 int err, mask; 4081 4082 err = 0; 4083 switch (command) { 4084 case SIOCSIFADDR: 4085 case SIOCGIFADDR: 4086 err = ether_ioctl(ifp, command, data); 4087 break; 4088 4089 case SIOCSIFMTU: 4090 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4091 break; 4092 4093 case SIOCSIFFLAGS: 4094 mtx_lock(&sc->driver_mtx); 4095 if (sc->dying) { 4096 mtx_unlock(&sc->driver_mtx); 4097 return EINVAL; 4098 } 4099 if (ifp->if_flags & IFF_UP) { 4100 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4101 err = mxge_open(sc); 4102 } else { 4103 /* take care of promis can allmulti 4104 flag chages */ 4105 mxge_change_promisc(sc, 4106 ifp->if_flags & IFF_PROMISC); 4107 mxge_set_multicast_list(sc); 4108 } 4109 } else { 4110 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4111 mxge_close(sc, 0); 4112 } 4113 } 4114 mtx_unlock(&sc->driver_mtx); 4115 break; 4116 4117 case SIOCADDMULTI: 4118 case SIOCDELMULTI: 4119 mtx_lock(&sc->driver_mtx); 4120 mxge_set_multicast_list(sc); 4121 mtx_unlock(&sc->driver_mtx); 4122 break; 4123 4124 case SIOCSIFCAP: 4125 mtx_lock(&sc->driver_mtx); 4126 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4127 if (mask & IFCAP_TXCSUM) { 4128 if (IFCAP_TXCSUM & ifp->if_capenable) { 4129 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4130 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4131 | CSUM_TSO); 4132 } else { 4133 ifp->if_capenable |= IFCAP_TXCSUM; 4134 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4135 } 4136 } else if (mask & IFCAP_RXCSUM) { 4137 if (IFCAP_RXCSUM & ifp->if_capenable) { 4138 ifp->if_capenable &= ~IFCAP_RXCSUM; 4139 sc->csum_flag = 0; 4140 } else { 4141 ifp->if_capenable |= IFCAP_RXCSUM; 4142 sc->csum_flag = 1; 4143 } 4144 } 4145 if (mask & IFCAP_TSO4) { 4146 if (IFCAP_TSO4 & ifp->if_capenable) { 4147 ifp->if_capenable &= ~IFCAP_TSO4; 4148 ifp->if_hwassist &= ~CSUM_TSO; 4149 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4150 ifp->if_capenable |= IFCAP_TSO4; 4151 ifp->if_hwassist |= CSUM_TSO; 4152 } else { 4153 printf("mxge requires tx checksum offload" 4154 " be enabled to use TSO\n"); 4155 err = EINVAL; 4156 } 4157 } 4158 if (mask & IFCAP_LRO) { 4159 if (IFCAP_LRO & ifp->if_capenable) 4160 err = mxge_change_lro_locked(sc, 0); 4161 else 4162 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4163 } 4164 if (mask & IFCAP_VLAN_HWTAGGING) 4165 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4166 if (mask & IFCAP_VLAN_HWTSO) 4167 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4168 4169 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4170 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4171 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4172 4173 mtx_unlock(&sc->driver_mtx); 4174 VLAN_CAPABILITIES(ifp); 4175 4176 break; 4177 4178 case SIOCGIFMEDIA: 4179 mtx_lock(&sc->driver_mtx); 4180 mxge_media_probe(sc); 4181 mtx_unlock(&sc->driver_mtx); 4182 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4183 &sc->media, command); 4184 break; 4185 4186 default: 4187 err = ENOTTY; 4188 } 4189 return err; 4190 } 4191 4192 static void 4193 mxge_fetch_tunables(mxge_softc_t *sc) 4194 { 4195 4196 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4197 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4198 &mxge_flow_control); 4199 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4200 &mxge_intr_coal_delay); 4201 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4202 &mxge_nvidia_ecrc_enable); 4203 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4204 &mxge_force_firmware); 4205 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4206 &mxge_deassert_wait); 4207 TUNABLE_INT_FETCH("hw.mxge.verbose", 4208 &mxge_verbose); 4209 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4210 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4211 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4212 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4213 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4214 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4215 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4216 if (sc->lro_cnt != 0) 4217 mxge_lro_cnt = sc->lro_cnt; 4218 4219 if (bootverbose) 4220 mxge_verbose = 1; 4221 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4222 mxge_intr_coal_delay = 30; 4223 if (mxge_ticks == 0) 4224 mxge_ticks = hz / 2; 4225 sc->pause = mxge_flow_control; 4226 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4227 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4228 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4229 } 4230 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4231 mxge_initial_mtu < ETHER_MIN_LEN) 4232 mxge_initial_mtu = ETHERMTU_JUMBO; 4233 4234 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4235 mxge_throttle = MXGE_MAX_THROTTLE; 4236 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4237 mxge_throttle = MXGE_MIN_THROTTLE; 4238 sc->throttle = mxge_throttle; 4239 } 4240 4241 4242 static void 4243 mxge_free_slices(mxge_softc_t *sc) 4244 { 4245 struct mxge_slice_state *ss; 4246 int i; 4247 4248 4249 if (sc->ss == NULL) 4250 return; 4251 4252 for (i = 0; i < sc->num_slices; i++) { 4253 ss = &sc->ss[i]; 4254 if (ss->fw_stats != NULL) { 4255 mxge_dma_free(&ss->fw_stats_dma); 4256 ss->fw_stats = NULL; 4257 #ifdef IFNET_BUF_RING 4258 if (ss->tx.br != NULL) { 4259 drbr_free(ss->tx.br, M_DEVBUF); 4260 ss->tx.br = NULL; 4261 } 4262 #endif 4263 mtx_destroy(&ss->tx.mtx); 4264 } 4265 if (ss->rx_done.entry != NULL) { 4266 mxge_dma_free(&ss->rx_done.dma); 4267 ss->rx_done.entry = NULL; 4268 } 4269 } 4270 free(sc->ss, M_DEVBUF); 4271 sc->ss = NULL; 4272 } 4273 4274 static int 4275 mxge_alloc_slices(mxge_softc_t *sc) 4276 { 4277 mxge_cmd_t cmd; 4278 struct mxge_slice_state *ss; 4279 size_t bytes; 4280 int err, i, max_intr_slots; 4281 4282 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4283 if (err != 0) { 4284 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4285 return err; 4286 } 4287 sc->rx_ring_size = cmd.data0; 4288 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4289 4290 bytes = sizeof (*sc->ss) * sc->num_slices; 4291 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4292 if (sc->ss == NULL) 4293 return (ENOMEM); 4294 for (i = 0; i < sc->num_slices; i++) { 4295 ss = &sc->ss[i]; 4296 4297 ss->sc = sc; 4298 4299 /* allocate per-slice rx interrupt queues */ 4300 4301 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4302 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4303 if (err != 0) 4304 goto abort; 4305 ss->rx_done.entry = ss->rx_done.dma.addr; 4306 bzero(ss->rx_done.entry, bytes); 4307 4308 /* 4309 * allocate the per-slice firmware stats; stats 4310 * (including tx) are used used only on the first 4311 * slice for now 4312 */ 4313 #ifndef IFNET_BUF_RING 4314 if (i > 0) 4315 continue; 4316 #endif 4317 4318 bytes = sizeof (*ss->fw_stats); 4319 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4320 sizeof (*ss->fw_stats), 64); 4321 if (err != 0) 4322 goto abort; 4323 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4324 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4325 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4326 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4327 #ifdef IFNET_BUF_RING 4328 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4329 &ss->tx.mtx); 4330 #endif 4331 } 4332 4333 return (0); 4334 4335 abort: 4336 mxge_free_slices(sc); 4337 return (ENOMEM); 4338 } 4339 4340 static void 4341 mxge_slice_probe(mxge_softc_t *sc) 4342 { 4343 mxge_cmd_t cmd; 4344 char *old_fw; 4345 int msix_cnt, status, max_intr_slots; 4346 4347 sc->num_slices = 1; 4348 /* 4349 * don't enable multiple slices if they are not enabled, 4350 * or if this is not an SMP system 4351 */ 4352 4353 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4354 return; 4355 4356 /* see how many MSI-X interrupts are available */ 4357 msix_cnt = pci_msix_count(sc->dev); 4358 if (msix_cnt < 2) 4359 return; 4360 4361 /* now load the slice aware firmware see what it supports */ 4362 old_fw = sc->fw_name; 4363 if (old_fw == mxge_fw_aligned) 4364 sc->fw_name = mxge_fw_rss_aligned; 4365 else 4366 sc->fw_name = mxge_fw_rss_unaligned; 4367 status = mxge_load_firmware(sc, 0); 4368 if (status != 0) { 4369 device_printf(sc->dev, "Falling back to a single slice\n"); 4370 return; 4371 } 4372 4373 /* try to send a reset command to the card to see if it 4374 is alive */ 4375 memset(&cmd, 0, sizeof (cmd)); 4376 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4377 if (status != 0) { 4378 device_printf(sc->dev, "failed reset\n"); 4379 goto abort_with_fw; 4380 } 4381 4382 /* get rx ring size */ 4383 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4384 if (status != 0) { 4385 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4386 goto abort_with_fw; 4387 } 4388 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4389 4390 /* tell it the size of the interrupt queues */ 4391 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4392 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4393 if (status != 0) { 4394 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4395 goto abort_with_fw; 4396 } 4397 4398 /* ask the maximum number of slices it supports */ 4399 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4400 if (status != 0) { 4401 device_printf(sc->dev, 4402 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4403 goto abort_with_fw; 4404 } 4405 sc->num_slices = cmd.data0; 4406 if (sc->num_slices > msix_cnt) 4407 sc->num_slices = msix_cnt; 4408 4409 if (mxge_max_slices == -1) { 4410 /* cap to number of CPUs in system */ 4411 if (sc->num_slices > mp_ncpus) 4412 sc->num_slices = mp_ncpus; 4413 } else { 4414 if (sc->num_slices > mxge_max_slices) 4415 sc->num_slices = mxge_max_slices; 4416 } 4417 /* make sure it is a power of two */ 4418 while (sc->num_slices & (sc->num_slices - 1)) 4419 sc->num_slices--; 4420 4421 if (mxge_verbose) 4422 device_printf(sc->dev, "using %d slices\n", 4423 sc->num_slices); 4424 4425 return; 4426 4427 abort_with_fw: 4428 sc->fw_name = old_fw; 4429 (void) mxge_load_firmware(sc, 0); 4430 } 4431 4432 static int 4433 mxge_add_msix_irqs(mxge_softc_t *sc) 4434 { 4435 size_t bytes; 4436 int count, err, i, rid; 4437 4438 rid = PCIR_BAR(2); 4439 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4440 &rid, RF_ACTIVE); 4441 4442 if (sc->msix_table_res == NULL) { 4443 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4444 return ENXIO; 4445 } 4446 4447 count = sc->num_slices; 4448 err = pci_alloc_msix(sc->dev, &count); 4449 if (err != 0) { 4450 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4451 "err = %d \n", sc->num_slices, err); 4452 goto abort_with_msix_table; 4453 } 4454 if (count < sc->num_slices) { 4455 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4456 count, sc->num_slices); 4457 device_printf(sc->dev, 4458 "Try setting hw.mxge.max_slices to %d\n", 4459 count); 4460 err = ENOSPC; 4461 goto abort_with_msix; 4462 } 4463 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4464 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4465 if (sc->msix_irq_res == NULL) { 4466 err = ENOMEM; 4467 goto abort_with_msix; 4468 } 4469 4470 for (i = 0; i < sc->num_slices; i++) { 4471 rid = i + 1; 4472 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4473 SYS_RES_IRQ, 4474 &rid, RF_ACTIVE); 4475 if (sc->msix_irq_res[i] == NULL) { 4476 device_printf(sc->dev, "couldn't allocate IRQ res" 4477 " for message %d\n", i); 4478 err = ENXIO; 4479 goto abort_with_res; 4480 } 4481 } 4482 4483 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4484 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4485 4486 for (i = 0; i < sc->num_slices; i++) { 4487 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4488 INTR_TYPE_NET | INTR_MPSAFE, 4489 #if __FreeBSD_version > 700030 4490 NULL, 4491 #endif 4492 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4493 if (err != 0) { 4494 device_printf(sc->dev, "couldn't setup intr for " 4495 "message %d\n", i); 4496 goto abort_with_intr; 4497 } 4498 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4499 sc->msix_ih[i], "s%d", i); 4500 } 4501 4502 if (mxge_verbose) { 4503 device_printf(sc->dev, "using %d msix IRQs:", 4504 sc->num_slices); 4505 for (i = 0; i < sc->num_slices; i++) 4506 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4507 printf("\n"); 4508 } 4509 return (0); 4510 4511 abort_with_intr: 4512 for (i = 0; i < sc->num_slices; i++) { 4513 if (sc->msix_ih[i] != NULL) { 4514 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4515 sc->msix_ih[i]); 4516 sc->msix_ih[i] = NULL; 4517 } 4518 } 4519 free(sc->msix_ih, M_DEVBUF); 4520 4521 4522 abort_with_res: 4523 for (i = 0; i < sc->num_slices; i++) { 4524 rid = i + 1; 4525 if (sc->msix_irq_res[i] != NULL) 4526 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4527 sc->msix_irq_res[i]); 4528 sc->msix_irq_res[i] = NULL; 4529 } 4530 free(sc->msix_irq_res, M_DEVBUF); 4531 4532 4533 abort_with_msix: 4534 pci_release_msi(sc->dev); 4535 4536 abort_with_msix_table: 4537 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4538 sc->msix_table_res); 4539 4540 return err; 4541 } 4542 4543 static int 4544 mxge_add_single_irq(mxge_softc_t *sc) 4545 { 4546 int count, err, rid; 4547 4548 count = pci_msi_count(sc->dev); 4549 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4550 rid = 1; 4551 } else { 4552 rid = 0; 4553 sc->legacy_irq = 1; 4554 } 4555 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4556 1, RF_SHAREABLE | RF_ACTIVE); 4557 if (sc->irq_res == NULL) { 4558 device_printf(sc->dev, "could not alloc interrupt\n"); 4559 return ENXIO; 4560 } 4561 if (mxge_verbose) 4562 device_printf(sc->dev, "using %s irq %ld\n", 4563 sc->legacy_irq ? "INTx" : "MSI", 4564 rman_get_start(sc->irq_res)); 4565 err = bus_setup_intr(sc->dev, sc->irq_res, 4566 INTR_TYPE_NET | INTR_MPSAFE, 4567 #if __FreeBSD_version > 700030 4568 NULL, 4569 #endif 4570 mxge_intr, &sc->ss[0], &sc->ih); 4571 if (err != 0) { 4572 bus_release_resource(sc->dev, SYS_RES_IRQ, 4573 sc->legacy_irq ? 0 : 1, sc->irq_res); 4574 if (!sc->legacy_irq) 4575 pci_release_msi(sc->dev); 4576 } 4577 return err; 4578 } 4579 4580 static void 4581 mxge_rem_msix_irqs(mxge_softc_t *sc) 4582 { 4583 int i, rid; 4584 4585 for (i = 0; i < sc->num_slices; i++) { 4586 if (sc->msix_ih[i] != NULL) { 4587 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4588 sc->msix_ih[i]); 4589 sc->msix_ih[i] = NULL; 4590 } 4591 } 4592 free(sc->msix_ih, M_DEVBUF); 4593 4594 for (i = 0; i < sc->num_slices; i++) { 4595 rid = i + 1; 4596 if (sc->msix_irq_res[i] != NULL) 4597 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4598 sc->msix_irq_res[i]); 4599 sc->msix_irq_res[i] = NULL; 4600 } 4601 free(sc->msix_irq_res, M_DEVBUF); 4602 4603 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4604 sc->msix_table_res); 4605 4606 pci_release_msi(sc->dev); 4607 return; 4608 } 4609 4610 static void 4611 mxge_rem_single_irq(mxge_softc_t *sc) 4612 { 4613 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4614 bus_release_resource(sc->dev, SYS_RES_IRQ, 4615 sc->legacy_irq ? 0 : 1, sc->irq_res); 4616 if (!sc->legacy_irq) 4617 pci_release_msi(sc->dev); 4618 } 4619 4620 static void 4621 mxge_rem_irq(mxge_softc_t *sc) 4622 { 4623 if (sc->num_slices > 1) 4624 mxge_rem_msix_irqs(sc); 4625 else 4626 mxge_rem_single_irq(sc); 4627 } 4628 4629 static int 4630 mxge_add_irq(mxge_softc_t *sc) 4631 { 4632 int err; 4633 4634 if (sc->num_slices > 1) 4635 err = mxge_add_msix_irqs(sc); 4636 else 4637 err = mxge_add_single_irq(sc); 4638 4639 if (0 && err == 0 && sc->num_slices > 1) { 4640 mxge_rem_msix_irqs(sc); 4641 err = mxge_add_msix_irqs(sc); 4642 } 4643 return err; 4644 } 4645 4646 4647 static int 4648 mxge_attach(device_t dev) 4649 { 4650 mxge_softc_t *sc = device_get_softc(dev); 4651 struct ifnet *ifp; 4652 int err, rid; 4653 4654 sc->dev = dev; 4655 mxge_fetch_tunables(sc); 4656 4657 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4658 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4659 taskqueue_thread_enqueue, &sc->tq); 4660 if (sc->tq == NULL) { 4661 err = ENOMEM; 4662 goto abort_with_nothing; 4663 } 4664 4665 err = bus_dma_tag_create(NULL, /* parent */ 4666 1, /* alignment */ 4667 0, /* boundary */ 4668 BUS_SPACE_MAXADDR, /* low */ 4669 BUS_SPACE_MAXADDR, /* high */ 4670 NULL, NULL, /* filter */ 4671 65536 + 256, /* maxsize */ 4672 MXGE_MAX_SEND_DESC, /* num segs */ 4673 65536, /* maxsegsize */ 4674 0, /* flags */ 4675 NULL, NULL, /* lock */ 4676 &sc->parent_dmat); /* tag */ 4677 4678 if (err != 0) { 4679 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4680 err); 4681 goto abort_with_tq; 4682 } 4683 4684 ifp = sc->ifp = if_alloc(IFT_ETHER); 4685 if (ifp == NULL) { 4686 device_printf(dev, "can not if_alloc()\n"); 4687 err = ENOSPC; 4688 goto abort_with_parent_dmat; 4689 } 4690 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4691 4692 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4693 device_get_nameunit(dev)); 4694 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4695 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4696 "%s:drv", device_get_nameunit(dev)); 4697 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4698 MTX_NETWORK_LOCK, MTX_DEF); 4699 4700 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4701 4702 mxge_setup_cfg_space(sc); 4703 4704 /* Map the board into the kernel */ 4705 rid = PCIR_BARS; 4706 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4707 ~0, 1, RF_ACTIVE); 4708 if (sc->mem_res == NULL) { 4709 device_printf(dev, "could not map memory\n"); 4710 err = ENXIO; 4711 goto abort_with_lock; 4712 } 4713 sc->sram = rman_get_virtual(sc->mem_res); 4714 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4715 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4716 device_printf(dev, "impossible memory region size %ld\n", 4717 rman_get_size(sc->mem_res)); 4718 err = ENXIO; 4719 goto abort_with_mem_res; 4720 } 4721 4722 /* make NULL terminated copy of the EEPROM strings section of 4723 lanai SRAM */ 4724 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4725 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4726 rman_get_bushandle(sc->mem_res), 4727 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4728 sc->eeprom_strings, 4729 MXGE_EEPROM_STRINGS_SIZE - 2); 4730 err = mxge_parse_strings(sc); 4731 if (err != 0) 4732 goto abort_with_mem_res; 4733 4734 /* Enable write combining for efficient use of PCIe bus */ 4735 mxge_enable_wc(sc); 4736 4737 /* Allocate the out of band dma memory */ 4738 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4739 sizeof (mxge_cmd_t), 64); 4740 if (err != 0) 4741 goto abort_with_mem_res; 4742 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4743 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4744 if (err != 0) 4745 goto abort_with_cmd_dma; 4746 4747 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4748 if (err != 0) 4749 goto abort_with_zeropad_dma; 4750 4751 /* select & load the firmware */ 4752 err = mxge_select_firmware(sc); 4753 if (err != 0) 4754 goto abort_with_dmabench; 4755 sc->intr_coal_delay = mxge_intr_coal_delay; 4756 4757 mxge_slice_probe(sc); 4758 err = mxge_alloc_slices(sc); 4759 if (err != 0) 4760 goto abort_with_dmabench; 4761 4762 err = mxge_reset(sc, 0); 4763 if (err != 0) 4764 goto abort_with_slices; 4765 4766 err = mxge_alloc_rings(sc); 4767 if (err != 0) { 4768 device_printf(sc->dev, "failed to allocate rings\n"); 4769 goto abort_with_slices; 4770 } 4771 4772 err = mxge_add_irq(sc); 4773 if (err != 0) { 4774 device_printf(sc->dev, "failed to add irq\n"); 4775 goto abort_with_rings; 4776 } 4777 4778 ifp->if_baudrate = IF_Gbps(10UL); 4779 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4780 IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 4781 #ifdef INET 4782 ifp->if_capabilities |= IFCAP_LRO; 4783 #endif 4784 4785 #ifdef MXGE_NEW_VLAN_API 4786 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4787 4788 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4789 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4790 sc->fw_ver_tiny >= 32) 4791 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4792 #endif 4793 4794 sc->max_mtu = mxge_max_mtu(sc); 4795 if (sc->max_mtu >= 9000) 4796 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4797 else 4798 device_printf(dev, "MTU limited to %d. Install " 4799 "latest firmware for 9000 byte jumbo support\n", 4800 sc->max_mtu - ETHER_HDR_LEN); 4801 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4802 ifp->if_capenable = ifp->if_capabilities; 4803 if (sc->lro_cnt == 0) 4804 ifp->if_capenable &= ~IFCAP_LRO; 4805 sc->csum_flag = 1; 4806 ifp->if_init = mxge_init; 4807 ifp->if_softc = sc; 4808 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4809 ifp->if_ioctl = mxge_ioctl; 4810 ifp->if_start = mxge_start; 4811 /* Initialise the ifmedia structure */ 4812 ifmedia_init(&sc->media, 0, mxge_media_change, 4813 mxge_media_status); 4814 mxge_media_init(sc); 4815 mxge_media_probe(sc); 4816 sc->dying = 0; 4817 ether_ifattach(ifp, sc->mac_addr); 4818 /* ether_ifattach sets mtu to ETHERMTU */ 4819 if (mxge_initial_mtu != ETHERMTU) 4820 mxge_change_mtu(sc, mxge_initial_mtu); 4821 4822 mxge_add_sysctls(sc); 4823 #ifdef IFNET_BUF_RING 4824 ifp->if_transmit = mxge_transmit; 4825 ifp->if_qflush = mxge_qflush; 4826 #endif 4827 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4828 device_get_nameunit(sc->dev)); 4829 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4830 return 0; 4831 4832 abort_with_rings: 4833 mxge_free_rings(sc); 4834 abort_with_slices: 4835 mxge_free_slices(sc); 4836 abort_with_dmabench: 4837 mxge_dma_free(&sc->dmabench_dma); 4838 abort_with_zeropad_dma: 4839 mxge_dma_free(&sc->zeropad_dma); 4840 abort_with_cmd_dma: 4841 mxge_dma_free(&sc->cmd_dma); 4842 abort_with_mem_res: 4843 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4844 abort_with_lock: 4845 pci_disable_busmaster(dev); 4846 mtx_destroy(&sc->cmd_mtx); 4847 mtx_destroy(&sc->driver_mtx); 4848 if_free(ifp); 4849 abort_with_parent_dmat: 4850 bus_dma_tag_destroy(sc->parent_dmat); 4851 abort_with_tq: 4852 if (sc->tq != NULL) { 4853 taskqueue_drain(sc->tq, &sc->watchdog_task); 4854 taskqueue_free(sc->tq); 4855 sc->tq = NULL; 4856 } 4857 abort_with_nothing: 4858 return err; 4859 } 4860 4861 static int 4862 mxge_detach(device_t dev) 4863 { 4864 mxge_softc_t *sc = device_get_softc(dev); 4865 4866 if (mxge_vlans_active(sc)) { 4867 device_printf(sc->dev, 4868 "Detach vlans before removing module\n"); 4869 return EBUSY; 4870 } 4871 mtx_lock(&sc->driver_mtx); 4872 sc->dying = 1; 4873 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4874 mxge_close(sc, 0); 4875 mtx_unlock(&sc->driver_mtx); 4876 ether_ifdetach(sc->ifp); 4877 if (sc->tq != NULL) { 4878 taskqueue_drain(sc->tq, &sc->watchdog_task); 4879 taskqueue_free(sc->tq); 4880 sc->tq = NULL; 4881 } 4882 callout_drain(&sc->co_hdl); 4883 ifmedia_removeall(&sc->media); 4884 mxge_dummy_rdma(sc, 0); 4885 mxge_rem_sysctls(sc); 4886 mxge_rem_irq(sc); 4887 mxge_free_rings(sc); 4888 mxge_free_slices(sc); 4889 mxge_dma_free(&sc->dmabench_dma); 4890 mxge_dma_free(&sc->zeropad_dma); 4891 mxge_dma_free(&sc->cmd_dma); 4892 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4893 pci_disable_busmaster(dev); 4894 mtx_destroy(&sc->cmd_mtx); 4895 mtx_destroy(&sc->driver_mtx); 4896 if_free(sc->ifp); 4897 bus_dma_tag_destroy(sc->parent_dmat); 4898 return 0; 4899 } 4900 4901 static int 4902 mxge_shutdown(device_t dev) 4903 { 4904 return 0; 4905 } 4906 4907 /* 4908 This file uses Myri10GE driver indentation. 4909 4910 Local Variables: 4911 c-file-style:"linux" 4912 tab-width:8 4913 End: 4914 */ 4915