1 /****************************************************************************** 2 3 Copyright (c) 2006, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Myricom Inc, nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/linker.h> 40 #include <sys/firmware.h> 41 #include <sys/endian.h> 42 #include <sys/sockio.h> 43 #include <sys/mbuf.h> 44 #include <sys/malloc.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/module.h> 49 #include <sys/memrange.h> 50 #include <sys/socket.h> 51 #include <sys/sysctl.h> 52 #include <sys/sx.h> 53 54 #include <net/if.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 #include <net/zlib.h> 65 66 #include <netinet/in_systm.h> 67 #include <netinet/in.h> 68 #include <netinet/ip.h> 69 #include <netinet/tcp.h> 70 71 #include <machine/bus.h> 72 #include <machine/in_cksum.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 77 #include <dev/pci/pcireg.h> 78 #include <dev/pci/pcivar.h> 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #include <dev/mxge/mxge_mcp.h> 84 #include <dev/mxge/mcp_gen_header.h> 85 #include <dev/mxge/if_mxge_var.h> 86 87 /* tunable params */ 88 static int mxge_nvidia_ecrc_enable = 1; 89 static int mxge_force_firmware = 0; 90 static int mxge_intr_coal_delay = 30; 91 static int mxge_deassert_wait = 1; 92 static int mxge_flow_control = 1; 93 static int mxge_verbose = 0; 94 static int mxge_ticks; 95 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 96 static char *mxge_fw_aligned = "mxge_eth_z8e"; 97 98 static int mxge_probe(device_t dev); 99 static int mxge_attach(device_t dev); 100 static int mxge_detach(device_t dev); 101 static int mxge_shutdown(device_t dev); 102 static void mxge_intr(void *arg); 103 104 static device_method_t mxge_methods[] = 105 { 106 /* Device interface */ 107 DEVMETHOD(device_probe, mxge_probe), 108 DEVMETHOD(device_attach, mxge_attach), 109 DEVMETHOD(device_detach, mxge_detach), 110 DEVMETHOD(device_shutdown, mxge_shutdown), 111 {0, 0} 112 }; 113 114 static driver_t mxge_driver = 115 { 116 "mxge", 117 mxge_methods, 118 sizeof(mxge_softc_t), 119 }; 120 121 static devclass_t mxge_devclass; 122 123 /* Declare ourselves to be a child of the PCI bus.*/ 124 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 125 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 126 127 static int mxge_load_firmware(mxge_softc_t *sc); 128 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 129 130 static int 131 mxge_probe(device_t dev) 132 { 133 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 134 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 135 device_set_desc(dev, "Myri10G-PCIE-8A"); 136 return 0; 137 } 138 return ENXIO; 139 } 140 141 static void 142 mxge_enable_wc(mxge_softc_t *sc) 143 { 144 struct mem_range_desc mrdesc; 145 vm_paddr_t pa; 146 vm_offset_t len; 147 int err, action; 148 149 pa = rman_get_start(sc->mem_res); 150 len = rman_get_size(sc->mem_res); 151 mrdesc.mr_base = pa; 152 mrdesc.mr_len = len; 153 mrdesc.mr_flags = MDF_WRITECOMBINE; 154 action = MEMRANGE_SET_UPDATE; 155 strcpy((char *)&mrdesc.mr_owner, "mxge"); 156 err = mem_range_attr_set(&mrdesc, &action); 157 if (err != 0) { 158 device_printf(sc->dev, 159 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 160 (unsigned long)pa, (unsigned long)len, err); 161 } else { 162 sc->wc = 1; 163 } 164 } 165 166 167 /* callback to get our DMA address */ 168 static void 169 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 170 int error) 171 { 172 if (error == 0) { 173 *(bus_addr_t *) arg = segs->ds_addr; 174 } 175 } 176 177 static int 178 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 179 bus_size_t alignment) 180 { 181 int err; 182 device_t dev = sc->dev; 183 184 /* allocate DMAable memory tags */ 185 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 186 alignment, /* alignment */ 187 4096, /* boundary */ 188 BUS_SPACE_MAXADDR, /* low */ 189 BUS_SPACE_MAXADDR, /* high */ 190 NULL, NULL, /* filter */ 191 bytes, /* maxsize */ 192 1, /* num segs */ 193 4096, /* maxsegsize */ 194 BUS_DMA_COHERENT, /* flags */ 195 NULL, NULL, /* lock */ 196 &dma->dmat); /* tag */ 197 if (err != 0) { 198 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 199 return err; 200 } 201 202 /* allocate DMAable memory & map */ 203 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 204 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 205 | BUS_DMA_ZERO), &dma->map); 206 if (err != 0) { 207 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 208 goto abort_with_dmat; 209 } 210 211 /* load the memory */ 212 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 213 mxge_dmamap_callback, 214 (void *)&dma->bus_addr, 0); 215 if (err != 0) { 216 device_printf(dev, "couldn't load map (err = %d)\n", err); 217 goto abort_with_mem; 218 } 219 return 0; 220 221 abort_with_mem: 222 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 223 abort_with_dmat: 224 (void)bus_dma_tag_destroy(dma->dmat); 225 return err; 226 } 227 228 229 static void 230 mxge_dma_free(mxge_dma_t *dma) 231 { 232 bus_dmamap_unload(dma->dmat, dma->map); 233 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 234 (void)bus_dma_tag_destroy(dma->dmat); 235 } 236 237 /* 238 * The eeprom strings on the lanaiX have the format 239 * SN=x\0 240 * MAC=x:x:x:x:x:x\0 241 * PC=text\0 242 */ 243 244 static int 245 mxge_parse_strings(mxge_softc_t *sc) 246 { 247 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 248 249 char *ptr, *limit; 250 int i, found_mac; 251 252 ptr = sc->eeprom_strings; 253 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 254 found_mac = 0; 255 while (ptr < limit && *ptr != '\0') { 256 if (memcmp(ptr, "MAC=", 4) == 0) { 257 ptr += 1; 258 sc->mac_addr_string = ptr; 259 for (i = 0; i < 6; i++) { 260 ptr += 3; 261 if ((ptr + 2) > limit) 262 goto abort; 263 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 264 found_mac = 1; 265 } 266 } else if (memcmp(ptr, "PC=", 3) == 0) { 267 ptr += 3; 268 strncpy(sc->product_code_string, ptr, 269 sizeof (sc->product_code_string) - 1); 270 } else if (memcmp(ptr, "SN=", 3) == 0) { 271 ptr += 3; 272 strncpy(sc->serial_number_string, ptr, 273 sizeof (sc->serial_number_string) - 1); 274 } 275 MXGE_NEXT_STRING(ptr); 276 } 277 278 if (found_mac) 279 return 0; 280 281 abort: 282 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 283 284 return ENXIO; 285 } 286 287 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 288 static void 289 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 290 { 291 uint32_t val; 292 unsigned long base, off; 293 char *va, *cfgptr; 294 device_t pdev, mcp55; 295 uint16_t vendor_id, device_id, word; 296 uintptr_t bus, slot, func, ivend, idev; 297 uint32_t *ptr32; 298 299 300 if (!mxge_nvidia_ecrc_enable) 301 return; 302 303 pdev = device_get_parent(device_get_parent(sc->dev)); 304 if (pdev == NULL) { 305 device_printf(sc->dev, "could not find parent?\n"); 306 return; 307 } 308 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 309 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 310 311 if (vendor_id != 0x10de) 312 return; 313 314 base = 0; 315 316 if (device_id == 0x005d) { 317 /* ck804, base address is magic */ 318 base = 0xe0000000UL; 319 } else if (device_id >= 0x0374 && device_id <= 0x378) { 320 /* mcp55, base address stored in chipset */ 321 mcp55 = pci_find_bsf(0, 0, 0); 322 if (mcp55 && 323 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 324 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 325 word = pci_read_config(mcp55, 0x90, 2); 326 base = ((unsigned long)word & 0x7ffeU) << 25; 327 } 328 } 329 if (!base) 330 return; 331 332 /* XXXX 333 Test below is commented because it is believed that doing 334 config read/write beyond 0xff will access the config space 335 for the next larger function. Uncomment this and remove 336 the hacky pmap_mapdev() way of accessing config space when 337 FreeBSD grows support for extended pcie config space access 338 */ 339 #if 0 340 /* See if we can, by some miracle, access the extended 341 config space */ 342 val = pci_read_config(pdev, 0x178, 4); 343 if (val != 0xffffffff) { 344 val |= 0x40; 345 pci_write_config(pdev, 0x178, val, 4); 346 return; 347 } 348 #endif 349 /* Rather than using normal pci config space writes, we must 350 * map the Nvidia config space ourselves. This is because on 351 * opteron/nvidia class machine the 0xe000000 mapping is 352 * handled by the nvidia chipset, that means the internal PCI 353 * device (the on-chip northbridge), or the amd-8131 bridge 354 * and things behind them are not visible by this method. 355 */ 356 357 BUS_READ_IVAR(device_get_parent(pdev), pdev, 358 PCI_IVAR_BUS, &bus); 359 BUS_READ_IVAR(device_get_parent(pdev), pdev, 360 PCI_IVAR_SLOT, &slot); 361 BUS_READ_IVAR(device_get_parent(pdev), pdev, 362 PCI_IVAR_FUNCTION, &func); 363 BUS_READ_IVAR(device_get_parent(pdev), pdev, 364 PCI_IVAR_VENDOR, &ivend); 365 BUS_READ_IVAR(device_get_parent(pdev), pdev, 366 PCI_IVAR_DEVICE, &idev); 367 368 off = base 369 + 0x00100000UL * (unsigned long)bus 370 + 0x00001000UL * (unsigned long)(func 371 + 8 * slot); 372 373 /* map it into the kernel */ 374 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 375 376 377 if (va == NULL) { 378 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 379 return; 380 } 381 /* get a pointer to the config space mapped into the kernel */ 382 cfgptr = va + (off & PAGE_MASK); 383 384 /* make sure that we can really access it */ 385 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 386 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 387 if (! (vendor_id == ivend && device_id == idev)) { 388 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 389 vendor_id, device_id); 390 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 391 return; 392 } 393 394 ptr32 = (uint32_t*)(cfgptr + 0x178); 395 val = *ptr32; 396 397 if (val == 0xffffffff) { 398 device_printf(sc->dev, "extended mapping failed\n"); 399 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 400 return; 401 } 402 *ptr32 = val | 0x40; 403 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 404 if (mxge_verbose) 405 device_printf(sc->dev, 406 "Enabled ECRC on upstream Nvidia bridge " 407 "at %d:%d:%d\n", 408 (int)bus, (int)slot, (int)func); 409 return; 410 } 411 #else 412 static void 413 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 414 { 415 device_printf(sc->dev, 416 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 417 return; 418 } 419 #endif 420 421 422 static int 423 mxge_dma_test(mxge_softc_t *sc, int test_type) 424 { 425 mxge_cmd_t cmd; 426 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 427 int status; 428 uint32_t len; 429 char *test = " "; 430 431 432 /* Run a small DMA test. 433 * The magic multipliers to the length tell the firmware 434 * to do DMA read, write, or read+write tests. The 435 * results are returned in cmd.data0. The upper 16 436 * bits of the return is the number of transfers completed. 437 * The lower 16 bits is the time in 0.5us ticks that the 438 * transfers took to complete. 439 */ 440 441 len = sc->tx.boundary; 442 443 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 444 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 445 cmd.data2 = len * 0x10000; 446 status = mxge_send_cmd(sc, test_type, &cmd); 447 if (status != 0) { 448 test = "read"; 449 goto abort; 450 } 451 sc->read_dma = ((cmd.data0>>16) * len * 2) / 452 (cmd.data0 & 0xffff); 453 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 454 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 455 cmd.data2 = len * 0x1; 456 status = mxge_send_cmd(sc, test_type, &cmd); 457 if (status != 0) { 458 test = "write"; 459 goto abort; 460 } 461 sc->write_dma = ((cmd.data0>>16) * len * 2) / 462 (cmd.data0 & 0xffff); 463 464 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 465 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 466 cmd.data2 = len * 0x10001; 467 status = mxge_send_cmd(sc, test_type, &cmd); 468 if (status != 0) { 469 test = "read/write"; 470 goto abort; 471 } 472 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 473 (cmd.data0 & 0xffff); 474 475 abort: 476 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 477 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 478 test, status); 479 480 return status; 481 } 482 483 /* 484 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 485 * when the PCI-E Completion packets are aligned on an 8-byte 486 * boundary. Some PCI-E chip sets always align Completion packets; on 487 * the ones that do not, the alignment can be enforced by enabling 488 * ECRC generation (if supported). 489 * 490 * When PCI-E Completion packets are not aligned, it is actually more 491 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 492 * 493 * If the driver can neither enable ECRC nor verify that it has 494 * already been enabled, then it must use a firmware image which works 495 * around unaligned completion packets (ethp_z8e.dat), and it should 496 * also ensure that it never gives the device a Read-DMA which is 497 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 498 * enabled, then the driver should use the aligned (eth_z8e.dat) 499 * firmware image, and set tx.boundary to 4KB. 500 */ 501 502 static int 503 mxge_firmware_probe(mxge_softc_t *sc) 504 { 505 device_t dev = sc->dev; 506 int reg, status; 507 uint16_t pectl; 508 509 sc->tx.boundary = 4096; 510 /* 511 * Verify the max read request size was set to 4KB 512 * before trying the test with 4KB. 513 */ 514 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 515 pectl = pci_read_config(dev, reg + 0x8, 2); 516 if ((pectl & (5 << 12)) != (5 << 12)) { 517 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 518 pectl); 519 sc->tx.boundary = 2048; 520 } 521 } 522 523 /* 524 * load the optimized firmware (which assumes aligned PCIe 525 * completions) in order to see if it works on this host. 526 */ 527 sc->fw_name = mxge_fw_aligned; 528 status = mxge_load_firmware(sc); 529 if (status != 0) { 530 return status; 531 } 532 533 /* 534 * Enable ECRC if possible 535 */ 536 mxge_enable_nvidia_ecrc(sc); 537 538 /* 539 * Run a DMA test which watches for unaligned completions and 540 * aborts on the first one seen. 541 */ 542 543 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 544 if (status == 0) 545 return 0; /* keep the aligned firmware */ 546 547 if (status != E2BIG) 548 device_printf(dev, "DMA test failed: %d\n", status); 549 if (status == ENOSYS) 550 device_printf(dev, "Falling back to ethp! " 551 "Please install up to date fw\n"); 552 return status; 553 } 554 555 static int 556 mxge_select_firmware(mxge_softc_t *sc) 557 { 558 int aligned = 0; 559 560 561 if (mxge_force_firmware != 0) { 562 if (mxge_force_firmware == 1) 563 aligned = 1; 564 else 565 aligned = 0; 566 if (mxge_verbose) 567 device_printf(sc->dev, 568 "Assuming %s completions (forced)\n", 569 aligned ? "aligned" : "unaligned"); 570 goto abort; 571 } 572 573 /* if the PCIe link width is 4 or less, we can use the aligned 574 firmware and skip any checks */ 575 if (sc->link_width != 0 && sc->link_width <= 4) { 576 device_printf(sc->dev, 577 "PCIe x%d Link, expect reduced performance\n", 578 sc->link_width); 579 aligned = 1; 580 goto abort; 581 } 582 583 if (0 == mxge_firmware_probe(sc)) 584 return 0; 585 586 abort: 587 if (aligned) { 588 sc->fw_name = mxge_fw_aligned; 589 sc->tx.boundary = 4096; 590 } else { 591 sc->fw_name = mxge_fw_unaligned; 592 sc->tx.boundary = 2048; 593 } 594 return (mxge_load_firmware(sc)); 595 } 596 597 union qualhack 598 { 599 const char *ro_char; 600 char *rw_char; 601 }; 602 603 static int 604 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 605 { 606 607 608 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 609 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 610 be32toh(hdr->mcp_type)); 611 return EIO; 612 } 613 614 /* save firmware version for sysctl */ 615 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 616 if (mxge_verbose) 617 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 618 619 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 620 &sc->fw_ver_minor, &sc->fw_ver_tiny); 621 622 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 623 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 624 device_printf(sc->dev, "Found firmware version %s\n", 625 sc->fw_version); 626 device_printf(sc->dev, "Driver needs %d.%d\n", 627 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 628 return EINVAL; 629 } 630 return 0; 631 632 } 633 634 static int 635 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 636 { 637 const struct firmware *fw; 638 const mcp_gen_header_t *hdr; 639 unsigned hdr_offset; 640 const char *fw_data; 641 union qualhack hack; 642 int status; 643 unsigned int i; 644 char dummy; 645 646 647 fw = firmware_get(sc->fw_name); 648 649 if (fw == NULL) { 650 device_printf(sc->dev, "Could not find firmware image %s\n", 651 sc->fw_name); 652 return ENOENT; 653 } 654 if (fw->datasize > *limit || 655 fw->datasize < MCP_HEADER_PTR_OFFSET + 4) { 656 device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n", 657 sc->fw_name, (int)fw->datasize, (int) *limit); 658 status = ENOSPC; 659 goto abort_with_fw; 660 } 661 *limit = fw->datasize; 662 663 /* check id */ 664 fw_data = (const char *)fw->data; 665 hdr_offset = htobe32(*(const uint32_t *) 666 (fw_data + MCP_HEADER_PTR_OFFSET)); 667 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) { 668 device_printf(sc->dev, "Bad firmware file"); 669 status = EIO; 670 goto abort_with_fw; 671 } 672 hdr = (const void*)(fw_data + hdr_offset); 673 674 status = mxge_validate_firmware(sc, hdr); 675 if (status != 0) 676 goto abort_with_fw; 677 678 hack.ro_char = fw_data; 679 /* Copy the inflated firmware to NIC SRAM. */ 680 for (i = 0; i < *limit; i += 256) { 681 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 682 hack.rw_char + i, 683 min(256U, (unsigned)(*limit - i))); 684 mb(); 685 dummy = *sc->sram; 686 mb(); 687 } 688 689 status = 0; 690 abort_with_fw: 691 firmware_put(fw, FIRMWARE_UNLOAD); 692 return status; 693 } 694 695 /* 696 * Enable or disable periodic RDMAs from the host to make certain 697 * chipsets resend dropped PCIe messages 698 */ 699 700 static void 701 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 702 { 703 char buf_bytes[72]; 704 volatile uint32_t *confirm; 705 volatile char *submit; 706 uint32_t *buf, dma_low, dma_high; 707 int i; 708 709 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 710 711 /* clear confirmation addr */ 712 confirm = (volatile uint32_t *)sc->cmd; 713 *confirm = 0; 714 mb(); 715 716 /* send an rdma command to the PCIe engine, and wait for the 717 response in the confirmation address. The firmware should 718 write a -1 there to indicate it is alive and well 719 */ 720 721 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 722 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 723 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 724 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 725 buf[2] = htobe32(0xffffffff); /* confirm data */ 726 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 727 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 728 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 729 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 730 buf[5] = htobe32(enable); /* enable? */ 731 732 733 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 734 735 mxge_pio_copy(submit, buf, 64); 736 mb(); 737 DELAY(1000); 738 mb(); 739 i = 0; 740 while (*confirm != 0xffffffff && i < 20) { 741 DELAY(1000); 742 i++; 743 } 744 if (*confirm != 0xffffffff) { 745 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 746 (enable ? "enable" : "disable"), confirm, 747 *confirm); 748 } 749 return; 750 } 751 752 static int 753 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 754 { 755 mcp_cmd_t *buf; 756 char buf_bytes[sizeof(*buf) + 8]; 757 volatile mcp_cmd_response_t *response = sc->cmd; 758 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 759 uint32_t dma_low, dma_high; 760 int err, sleep_total = 0; 761 762 /* ensure buf is aligned to 8 bytes */ 763 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 764 765 buf->data0 = htobe32(data->data0); 766 buf->data1 = htobe32(data->data1); 767 buf->data2 = htobe32(data->data2); 768 buf->cmd = htobe32(cmd); 769 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 770 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 771 772 buf->response_addr.low = htobe32(dma_low); 773 buf->response_addr.high = htobe32(dma_high); 774 mtx_lock(&sc->cmd_mtx); 775 response->result = 0xffffffff; 776 mb(); 777 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 778 779 /* wait up to 20ms */ 780 err = EAGAIN; 781 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 782 bus_dmamap_sync(sc->cmd_dma.dmat, 783 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 784 mb(); 785 switch (be32toh(response->result)) { 786 case 0: 787 data->data0 = be32toh(response->data); 788 err = 0; 789 break; 790 case 0xffffffff: 791 DELAY(1000); 792 break; 793 case MXGEFW_CMD_UNKNOWN: 794 err = ENOSYS; 795 break; 796 case MXGEFW_CMD_ERROR_UNALIGNED: 797 err = E2BIG; 798 break; 799 default: 800 device_printf(sc->dev, 801 "mxge: command %d " 802 "failed, result = %d\n", 803 cmd, be32toh(response->result)); 804 err = ENXIO; 805 break; 806 } 807 if (err != EAGAIN) 808 break; 809 } 810 if (err == EAGAIN) 811 device_printf(sc->dev, "mxge: command %d timed out" 812 "result = %d\n", 813 cmd, be32toh(response->result)); 814 mtx_unlock(&sc->cmd_mtx); 815 return err; 816 } 817 818 static int 819 mxge_adopt_running_firmware(mxge_softc_t *sc) 820 { 821 struct mcp_gen_header *hdr; 822 const size_t bytes = sizeof (struct mcp_gen_header); 823 size_t hdr_offset; 824 int status; 825 826 /* find running firmware header */ 827 hdr_offset = htobe32(*(volatile uint32_t *) 828 (sc->sram + MCP_HEADER_PTR_OFFSET)); 829 830 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 831 device_printf(sc->dev, 832 "Running firmware has bad header offset (%d)\n", 833 (int)hdr_offset); 834 return EIO; 835 } 836 837 /* copy header of running firmware from SRAM to host memory to 838 * validate firmware */ 839 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 840 if (hdr == NULL) { 841 device_printf(sc->dev, "could not malloc firmware hdr\n"); 842 return ENOMEM; 843 } 844 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 845 rman_get_bushandle(sc->mem_res), 846 hdr_offset, (char *)hdr, bytes); 847 status = mxge_validate_firmware(sc, hdr); 848 free(hdr, M_DEVBUF); 849 850 /* 851 * check to see if adopted firmware has bug where adopting 852 * it will cause broadcasts to be filtered unless the NIC 853 * is kept in ALLMULTI mode 854 */ 855 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 856 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 857 sc->adopted_rx_filter_bug = 1; 858 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 859 "working around rx filter bug\n", 860 sc->fw_ver_major, sc->fw_ver_minor, 861 sc->fw_ver_tiny); 862 } 863 864 return status; 865 } 866 867 868 static int 869 mxge_load_firmware(mxge_softc_t *sc) 870 { 871 volatile uint32_t *confirm; 872 volatile char *submit; 873 char buf_bytes[72]; 874 uint32_t *buf, size, dma_low, dma_high; 875 int status, i; 876 877 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 878 879 size = sc->sram_size; 880 status = mxge_load_firmware_helper(sc, &size); 881 if (status) { 882 /* Try to use the currently running firmware, if 883 it is new enough */ 884 status = mxge_adopt_running_firmware(sc); 885 if (status) { 886 device_printf(sc->dev, 887 "failed to adopt running firmware\n"); 888 return status; 889 } 890 device_printf(sc->dev, 891 "Successfully adopted running firmware\n"); 892 if (sc->tx.boundary == 4096) { 893 device_printf(sc->dev, 894 "Using firmware currently running on NIC" 895 ". For optimal\n"); 896 device_printf(sc->dev, 897 "performance consider loading optimized " 898 "firmware\n"); 899 } 900 sc->fw_name = mxge_fw_unaligned; 901 sc->tx.boundary = 2048; 902 return 0; 903 } 904 /* clear confirmation addr */ 905 confirm = (volatile uint32_t *)sc->cmd; 906 *confirm = 0; 907 mb(); 908 /* send a reload command to the bootstrap MCP, and wait for the 909 response in the confirmation address. The firmware should 910 write a -1 there to indicate it is alive and well 911 */ 912 913 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 914 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 915 916 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 917 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 918 buf[2] = htobe32(0xffffffff); /* confirm data */ 919 920 /* FIX: All newest firmware should un-protect the bottom of 921 the sram before handoff. However, the very first interfaces 922 do not. Therefore the handoff copy must skip the first 8 bytes 923 */ 924 /* where the code starts*/ 925 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 926 buf[4] = htobe32(size - 8); /* length of code */ 927 buf[5] = htobe32(8); /* where to copy to */ 928 buf[6] = htobe32(0); /* where to jump to */ 929 930 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 931 mxge_pio_copy(submit, buf, 64); 932 mb(); 933 DELAY(1000); 934 mb(); 935 i = 0; 936 while (*confirm != 0xffffffff && i < 20) { 937 DELAY(1000*10); 938 i++; 939 bus_dmamap_sync(sc->cmd_dma.dmat, 940 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 941 } 942 if (*confirm != 0xffffffff) { 943 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 944 confirm, *confirm); 945 946 return ENXIO; 947 } 948 return 0; 949 } 950 951 static int 952 mxge_update_mac_address(mxge_softc_t *sc) 953 { 954 mxge_cmd_t cmd; 955 uint8_t *addr = sc->mac_addr; 956 int status; 957 958 959 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 960 | (addr[2] << 8) | addr[3]); 961 962 cmd.data1 = ((addr[4] << 8) | (addr[5])); 963 964 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 965 return status; 966 } 967 968 static int 969 mxge_change_pause(mxge_softc_t *sc, int pause) 970 { 971 mxge_cmd_t cmd; 972 int status; 973 974 if (pause) 975 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 976 &cmd); 977 else 978 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 979 &cmd); 980 981 if (status) { 982 device_printf(sc->dev, "Failed to set flow control mode\n"); 983 return ENXIO; 984 } 985 sc->pause = pause; 986 return 0; 987 } 988 989 static void 990 mxge_change_promisc(mxge_softc_t *sc, int promisc) 991 { 992 mxge_cmd_t cmd; 993 int status; 994 995 if (promisc) 996 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 997 &cmd); 998 else 999 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1000 &cmd); 1001 1002 if (status) { 1003 device_printf(sc->dev, "Failed to set promisc mode\n"); 1004 } 1005 } 1006 1007 static void 1008 mxge_set_multicast_list(mxge_softc_t *sc) 1009 { 1010 mxge_cmd_t cmd; 1011 struct ifmultiaddr *ifma; 1012 struct ifnet *ifp = sc->ifp; 1013 int err; 1014 1015 /* This firmware is known to not support multicast */ 1016 if (!sc->fw_multicast_support) 1017 return; 1018 1019 /* Disable multicast filtering while we play with the lists*/ 1020 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1021 if (err != 0) { 1022 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1023 " error status: %d\n", err); 1024 return; 1025 } 1026 1027 if (sc->adopted_rx_filter_bug) 1028 return; 1029 1030 if (ifp->if_flags & IFF_ALLMULTI) 1031 /* request to disable multicast filtering, so quit here */ 1032 return; 1033 1034 /* Flush all the filters */ 1035 1036 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1037 if (err != 0) { 1038 device_printf(sc->dev, 1039 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1040 ", error status: %d\n", err); 1041 return; 1042 } 1043 1044 /* Walk the multicast list, and add each address */ 1045 1046 IF_ADDR_LOCK(ifp); 1047 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1048 if (ifma->ifma_addr->sa_family != AF_LINK) 1049 continue; 1050 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1051 &cmd.data0, 4); 1052 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1053 &cmd.data1, 2); 1054 cmd.data0 = htonl(cmd.data0); 1055 cmd.data1 = htonl(cmd.data1); 1056 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1057 if (err != 0) { 1058 device_printf(sc->dev, "Failed " 1059 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1060 "%d\t", err); 1061 /* abort, leaving multicast filtering off */ 1062 IF_ADDR_UNLOCK(ifp); 1063 return; 1064 } 1065 } 1066 IF_ADDR_UNLOCK(ifp); 1067 /* Enable multicast filtering */ 1068 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1069 if (err != 0) { 1070 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1071 ", error status: %d\n", err); 1072 } 1073 } 1074 1075 static int 1076 mxge_max_mtu(mxge_softc_t *sc) 1077 { 1078 mxge_cmd_t cmd; 1079 int status; 1080 1081 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1082 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1083 1084 /* try to set nbufs to see if it we can 1085 use virtually contiguous jumbos */ 1086 cmd.data0 = 0; 1087 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1088 &cmd); 1089 if (status == 0) 1090 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1091 1092 /* otherwise, we're limited to MJUMPAGESIZE */ 1093 return MJUMPAGESIZE - MXGEFW_PAD; 1094 } 1095 1096 static int 1097 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1098 { 1099 1100 mxge_cmd_t cmd; 1101 size_t bytes; 1102 int status; 1103 1104 /* try to send a reset command to the card to see if it 1105 is alive */ 1106 memset(&cmd, 0, sizeof (cmd)); 1107 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1108 if (status != 0) { 1109 device_printf(sc->dev, "failed reset\n"); 1110 return ENXIO; 1111 } 1112 1113 mxge_dummy_rdma(sc, 1); 1114 1115 if (interrupts_setup) { 1116 /* Now exchange information about interrupts */ 1117 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 1118 memset(sc->rx_done.entry, 0, bytes); 1119 cmd.data0 = (uint32_t)bytes; 1120 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1121 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 1122 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 1123 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 1124 } 1125 1126 status |= mxge_send_cmd(sc, 1127 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1128 1129 1130 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1131 1132 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1133 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1134 1135 1136 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1137 &cmd); 1138 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1139 if (status != 0) { 1140 device_printf(sc->dev, "failed set interrupt parameters\n"); 1141 return status; 1142 } 1143 1144 1145 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1146 1147 1148 /* run a DMA benchmark */ 1149 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1150 1151 /* reset mcp/driver shared state back to 0 */ 1152 sc->rx_done.idx = 0; 1153 sc->rx_done.cnt = 0; 1154 sc->tx.req = 0; 1155 sc->tx.done = 0; 1156 sc->tx.pkt_done = 0; 1157 sc->tx.wake = 0; 1158 sc->tx_defrag = 0; 1159 sc->tx.stall = 0; 1160 sc->rx_big.cnt = 0; 1161 sc->rx_small.cnt = 0; 1162 sc->rdma_tags_available = 15; 1163 sc->fw_stats->valid = 0; 1164 sc->fw_stats->send_done_count = 0; 1165 sc->lro_bad_csum = 0; 1166 sc->lro_queued = 0; 1167 sc->lro_flushed = 0; 1168 status = mxge_update_mac_address(sc); 1169 mxge_change_promisc(sc, 0); 1170 mxge_change_pause(sc, sc->pause); 1171 mxge_set_multicast_list(sc); 1172 return status; 1173 } 1174 1175 static int 1176 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1177 { 1178 mxge_softc_t *sc; 1179 unsigned int intr_coal_delay; 1180 int err; 1181 1182 sc = arg1; 1183 intr_coal_delay = sc->intr_coal_delay; 1184 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1185 if (err != 0) { 1186 return err; 1187 } 1188 if (intr_coal_delay == sc->intr_coal_delay) 1189 return 0; 1190 1191 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1192 return EINVAL; 1193 1194 mtx_lock(&sc->driver_mtx); 1195 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1196 sc->intr_coal_delay = intr_coal_delay; 1197 1198 mtx_unlock(&sc->driver_mtx); 1199 return err; 1200 } 1201 1202 static int 1203 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1204 { 1205 mxge_softc_t *sc; 1206 unsigned int enabled; 1207 int err; 1208 1209 sc = arg1; 1210 enabled = sc->pause; 1211 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1212 if (err != 0) { 1213 return err; 1214 } 1215 if (enabled == sc->pause) 1216 return 0; 1217 1218 mtx_lock(&sc->driver_mtx); 1219 err = mxge_change_pause(sc, enabled); 1220 mtx_unlock(&sc->driver_mtx); 1221 return err; 1222 } 1223 1224 static int 1225 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1226 { 1227 int err; 1228 1229 if (arg1 == NULL) 1230 return EFAULT; 1231 arg2 = be32toh(*(int *)arg1); 1232 arg1 = NULL; 1233 err = sysctl_handle_int(oidp, arg1, arg2, req); 1234 1235 return err; 1236 } 1237 1238 static void 1239 mxge_add_sysctls(mxge_softc_t *sc) 1240 { 1241 struct sysctl_ctx_list *ctx; 1242 struct sysctl_oid_list *children; 1243 mcp_irq_data_t *fw; 1244 1245 ctx = device_get_sysctl_ctx(sc->dev); 1246 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1247 fw = sc->fw_stats; 1248 1249 /* random information */ 1250 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1251 "firmware_version", 1252 CTLFLAG_RD, &sc->fw_version, 1253 0, "firmware version"); 1254 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1255 "serial_number", 1256 CTLFLAG_RD, &sc->serial_number_string, 1257 0, "serial number"); 1258 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1259 "product_code", 1260 CTLFLAG_RD, &sc->product_code_string, 1261 0, "product_code"); 1262 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1263 "pcie_link_width", 1264 CTLFLAG_RD, &sc->link_width, 1265 0, "tx_boundary"); 1266 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1267 "tx_boundary", 1268 CTLFLAG_RD, &sc->tx.boundary, 1269 0, "tx_boundary"); 1270 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1271 "write_combine", 1272 CTLFLAG_RD, &sc->wc, 1273 0, "write combining PIO?"); 1274 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1275 "read_dma_MBs", 1276 CTLFLAG_RD, &sc->read_dma, 1277 0, "DMA Read speed in MB/s"); 1278 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1279 "write_dma_MBs", 1280 CTLFLAG_RD, &sc->write_dma, 1281 0, "DMA Write speed in MB/s"); 1282 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1283 "read_write_dma_MBs", 1284 CTLFLAG_RD, &sc->read_write_dma, 1285 0, "DMA concurrent Read/Write speed in MB/s"); 1286 1287 1288 /* performance related tunables */ 1289 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1290 "intr_coal_delay", 1291 CTLTYPE_INT|CTLFLAG_RW, sc, 1292 0, mxge_change_intr_coal, 1293 "I", "interrupt coalescing delay in usecs"); 1294 1295 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1296 "flow_control_enabled", 1297 CTLTYPE_INT|CTLFLAG_RW, sc, 1298 0, mxge_change_flow_control, 1299 "I", "interrupt coalescing delay in usecs"); 1300 1301 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1302 "deassert_wait", 1303 CTLFLAG_RW, &mxge_deassert_wait, 1304 0, "Wait for IRQ line to go low in ihandler"); 1305 1306 /* stats block from firmware is in network byte order. 1307 Need to swap it */ 1308 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1309 "link_up", 1310 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1311 0, mxge_handle_be32, 1312 "I", "link up"); 1313 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1314 "rdma_tags_available", 1315 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1316 0, mxge_handle_be32, 1317 "I", "rdma_tags_available"); 1318 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1319 "dropped_bad_crc32", 1320 CTLTYPE_INT|CTLFLAG_RD, 1321 &fw->dropped_bad_crc32, 1322 0, mxge_handle_be32, 1323 "I", "dropped_bad_crc32"); 1324 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1325 "dropped_bad_phy", 1326 CTLTYPE_INT|CTLFLAG_RD, 1327 &fw->dropped_bad_phy, 1328 0, mxge_handle_be32, 1329 "I", "dropped_bad_phy"); 1330 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1331 "dropped_link_error_or_filtered", 1332 CTLTYPE_INT|CTLFLAG_RD, 1333 &fw->dropped_link_error_or_filtered, 1334 0, mxge_handle_be32, 1335 "I", "dropped_link_error_or_filtered"); 1336 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1337 "dropped_link_overflow", 1338 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1339 0, mxge_handle_be32, 1340 "I", "dropped_link_overflow"); 1341 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1342 "dropped_multicast_filtered", 1343 CTLTYPE_INT|CTLFLAG_RD, 1344 &fw->dropped_multicast_filtered, 1345 0, mxge_handle_be32, 1346 "I", "dropped_multicast_filtered"); 1347 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1348 "dropped_no_big_buffer", 1349 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1350 0, mxge_handle_be32, 1351 "I", "dropped_no_big_buffer"); 1352 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1353 "dropped_no_small_buffer", 1354 CTLTYPE_INT|CTLFLAG_RD, 1355 &fw->dropped_no_small_buffer, 1356 0, mxge_handle_be32, 1357 "I", "dropped_no_small_buffer"); 1358 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1359 "dropped_overrun", 1360 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1361 0, mxge_handle_be32, 1362 "I", "dropped_overrun"); 1363 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1364 "dropped_pause", 1365 CTLTYPE_INT|CTLFLAG_RD, 1366 &fw->dropped_pause, 1367 0, mxge_handle_be32, 1368 "I", "dropped_pause"); 1369 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1370 "dropped_runt", 1371 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1372 0, mxge_handle_be32, 1373 "I", "dropped_runt"); 1374 1375 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1376 "dropped_unicast_filtered", 1377 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1378 0, mxge_handle_be32, 1379 "I", "dropped_unicast_filtered"); 1380 1381 /* host counters exported for debugging */ 1382 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1383 "rx_small_cnt", 1384 CTLFLAG_RD, &sc->rx_small.cnt, 1385 0, "rx_small_cnt"); 1386 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1387 "rx_big_cnt", 1388 CTLFLAG_RD, &sc->rx_big.cnt, 1389 0, "rx_small_cnt"); 1390 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1391 "tx_req", 1392 CTLFLAG_RD, &sc->tx.req, 1393 0, "tx_req"); 1394 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1395 "tx_done", 1396 CTLFLAG_RD, &sc->tx.done, 1397 0, "tx_done"); 1398 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1399 "tx_pkt_done", 1400 CTLFLAG_RD, &sc->tx.pkt_done, 1401 0, "tx_done"); 1402 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1403 "tx_stall", 1404 CTLFLAG_RD, &sc->tx.stall, 1405 0, "tx_stall"); 1406 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1407 "tx_wake", 1408 CTLFLAG_RD, &sc->tx.wake, 1409 0, "tx_wake"); 1410 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1411 "tx_defrag", 1412 CTLFLAG_RD, &sc->tx_defrag, 1413 0, "tx_defrag"); 1414 1415 /* verbose printing? */ 1416 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1417 "verbose", 1418 CTLFLAG_RW, &mxge_verbose, 1419 0, "verbose printing"); 1420 1421 /* lro */ 1422 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1423 "lro_cnt", CTLFLAG_RD, &sc->lro_cnt, 1424 0, "number of lro merge queues"); 1425 1426 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1427 "lro_flushed", CTLFLAG_RD, &sc->lro_flushed, 1428 0, "number of lro merge queues flushed"); 1429 1430 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1431 "lro_queued", CTLFLAG_RD, &sc->lro_queued, 1432 0, "number of frames appended to lro merge queues"); 1433 1434 } 1435 1436 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1437 backwards one at a time and handle ring wraps */ 1438 1439 static inline void 1440 mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1441 mcp_kreq_ether_send_t *src, int cnt) 1442 { 1443 int idx, starting_slot; 1444 starting_slot = tx->req; 1445 while (cnt > 1) { 1446 cnt--; 1447 idx = (starting_slot + cnt) & tx->mask; 1448 mxge_pio_copy(&tx->lanai[idx], 1449 &src[cnt], sizeof(*src)); 1450 mb(); 1451 } 1452 } 1453 1454 /* 1455 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1456 * at most 32 bytes at a time, so as to avoid involving the software 1457 * pio handler in the nic. We re-write the first segment's flags 1458 * to mark them valid only after writing the entire chain 1459 */ 1460 1461 static inline void 1462 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1463 int cnt) 1464 { 1465 int idx, i; 1466 uint32_t *src_ints; 1467 volatile uint32_t *dst_ints; 1468 mcp_kreq_ether_send_t *srcp; 1469 volatile mcp_kreq_ether_send_t *dstp, *dst; 1470 uint8_t last_flags; 1471 1472 idx = tx->req & tx->mask; 1473 1474 last_flags = src->flags; 1475 src->flags = 0; 1476 mb(); 1477 dst = dstp = &tx->lanai[idx]; 1478 srcp = src; 1479 1480 if ((idx + cnt) < tx->mask) { 1481 for (i = 0; i < (cnt - 1); i += 2) { 1482 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1483 mb(); /* force write every 32 bytes */ 1484 srcp += 2; 1485 dstp += 2; 1486 } 1487 } else { 1488 /* submit all but the first request, and ensure 1489 that it is submitted below */ 1490 mxge_submit_req_backwards(tx, src, cnt); 1491 i = 0; 1492 } 1493 if (i < cnt) { 1494 /* submit the first request */ 1495 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1496 mb(); /* barrier before setting valid flag */ 1497 } 1498 1499 /* re-write the last 32-bits with the valid flags */ 1500 src->flags = last_flags; 1501 src_ints = (uint32_t *)src; 1502 src_ints+=3; 1503 dst_ints = (volatile uint32_t *)dst; 1504 dst_ints+=3; 1505 *dst_ints = *src_ints; 1506 tx->req += cnt; 1507 mb(); 1508 } 1509 1510 static void 1511 mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, 1512 int ip_off) 1513 { 1514 mxge_tx_buf_t *tx; 1515 mcp_kreq_ether_send_t *req; 1516 bus_dma_segment_t *seg; 1517 struct ip *ip; 1518 struct tcphdr *tcp; 1519 uint32_t low, high_swapped; 1520 int len, seglen, cum_len, cum_len_next; 1521 int next_is_first, chop, cnt, rdma_count, small; 1522 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1523 uint8_t flags, flags_next; 1524 static int once; 1525 1526 mss = m->m_pkthdr.tso_segsz; 1527 1528 /* negative cum_len signifies to the 1529 * send loop that we are still in the 1530 * header portion of the TSO packet. 1531 */ 1532 1533 /* ensure we have the ethernet, IP and TCP 1534 header together in the first mbuf, copy 1535 it to a scratch buffer if not */ 1536 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1537 m_copydata(m, 0, ip_off + sizeof (*ip), 1538 sc->scratch); 1539 ip = (struct ip *)(sc->scratch + ip_off); 1540 } else { 1541 ip = (struct ip *)(mtod(m, char *) + ip_off); 1542 } 1543 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1544 + sizeof (*tcp))) { 1545 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1546 + sizeof (*tcp), sc->scratch); 1547 ip = (struct ip *)(mtod(m, char *) + ip_off); 1548 } 1549 1550 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1551 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1552 1553 /* TSO implies checksum offload on this hardware */ 1554 cksum_offset = ip_off + (ip->ip_hl << 2); 1555 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1556 1557 1558 /* for TSO, pseudo_hdr_offset holds mss. 1559 * The firmware figures out where to put 1560 * the checksum by parsing the header. */ 1561 pseudo_hdr_offset = htobe16(mss); 1562 1563 tx = &sc->tx; 1564 req = tx->req_list; 1565 seg = tx->seg_list; 1566 cnt = 0; 1567 rdma_count = 0; 1568 /* "rdma_count" is the number of RDMAs belonging to the 1569 * current packet BEFORE the current send request. For 1570 * non-TSO packets, this is equal to "count". 1571 * For TSO packets, rdma_count needs to be reset 1572 * to 0 after a segment cut. 1573 * 1574 * The rdma_count field of the send request is 1575 * the number of RDMAs of the packet starting at 1576 * that request. For TSO send requests with one ore more cuts 1577 * in the middle, this is the number of RDMAs starting 1578 * after the last cut in the request. All previous 1579 * segments before the last cut implicitly have 1 RDMA. 1580 * 1581 * Since the number of RDMAs is not known beforehand, 1582 * it must be filled-in retroactively - after each 1583 * segmentation cut or at the end of the entire packet. 1584 */ 1585 1586 while (busdma_seg_cnt) { 1587 /* Break the busdma segment up into pieces*/ 1588 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1589 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1590 len = seg->ds_len; 1591 1592 while (len) { 1593 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1594 seglen = len; 1595 cum_len_next = cum_len + seglen; 1596 (req-rdma_count)->rdma_count = rdma_count + 1; 1597 if (__predict_true(cum_len >= 0)) { 1598 /* payload */ 1599 chop = (cum_len_next > mss); 1600 cum_len_next = cum_len_next % mss; 1601 next_is_first = (cum_len_next == 0); 1602 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1603 flags_next |= next_is_first * 1604 MXGEFW_FLAGS_FIRST; 1605 rdma_count |= -(chop | next_is_first); 1606 rdma_count += chop & !next_is_first; 1607 } else if (cum_len_next >= 0) { 1608 /* header ends */ 1609 rdma_count = -1; 1610 cum_len_next = 0; 1611 seglen = -cum_len; 1612 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1613 flags_next = MXGEFW_FLAGS_TSO_PLD | 1614 MXGEFW_FLAGS_FIRST | 1615 (small * MXGEFW_FLAGS_SMALL); 1616 } 1617 1618 req->addr_high = high_swapped; 1619 req->addr_low = htobe32(low); 1620 req->pseudo_hdr_offset = pseudo_hdr_offset; 1621 req->pad = 0; 1622 req->rdma_count = 1; 1623 req->length = htobe16(seglen); 1624 req->cksum_offset = cksum_offset; 1625 req->flags = flags | ((cum_len & 1) * 1626 MXGEFW_FLAGS_ALIGN_ODD); 1627 low += seglen; 1628 len -= seglen; 1629 cum_len = cum_len_next; 1630 flags = flags_next; 1631 req++; 1632 cnt++; 1633 rdma_count++; 1634 if (__predict_false(cksum_offset > seglen)) 1635 cksum_offset -= seglen; 1636 else 1637 cksum_offset = 0; 1638 if (__predict_false(cnt > tx->max_desc)) 1639 goto drop; 1640 } 1641 busdma_seg_cnt--; 1642 seg++; 1643 } 1644 (req-rdma_count)->rdma_count = rdma_count; 1645 1646 do { 1647 req--; 1648 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1649 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1650 1651 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1652 mxge_submit_req(tx, tx->req_list, cnt); 1653 return; 1654 1655 drop: 1656 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1657 m_freem(m); 1658 sc->ifp->if_oerrors++; 1659 if (!once) { 1660 printf("tx->max_desc exceeded via TSO!\n"); 1661 printf("mss = %d, %ld, %d!\n", mss, 1662 (long)seg - (long)tx->seg_list, tx->max_desc); 1663 once = 1; 1664 } 1665 return; 1666 1667 } 1668 1669 /* 1670 * We reproduce the software vlan tag insertion from 1671 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1672 * vlan tag insertion. We need to advertise this in order to have the 1673 * vlan interface respect our csum offload flags. 1674 */ 1675 static struct mbuf * 1676 mxge_vlan_tag_insert(struct mbuf *m) 1677 { 1678 struct ether_vlan_header *evl; 1679 1680 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1681 if (__predict_false(m == NULL)) 1682 return NULL; 1683 if (m->m_len < sizeof(*evl)) { 1684 m = m_pullup(m, sizeof(*evl)); 1685 if (__predict_false(m == NULL)) 1686 return NULL; 1687 } 1688 /* 1689 * Transform the Ethernet header into an Ethernet header 1690 * with 802.1Q encapsulation. 1691 */ 1692 evl = mtod(m, struct ether_vlan_header *); 1693 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1694 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1695 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1696 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1697 m->m_flags &= ~M_VLANTAG; 1698 return m; 1699 } 1700 1701 static void 1702 mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1703 { 1704 mcp_kreq_ether_send_t *req; 1705 bus_dma_segment_t *seg; 1706 struct mbuf *m_tmp; 1707 struct ifnet *ifp; 1708 mxge_tx_buf_t *tx; 1709 struct ip *ip; 1710 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1711 uint16_t pseudo_hdr_offset; 1712 uint8_t flags, cksum_offset; 1713 1714 1715 1716 ifp = sc->ifp; 1717 tx = &sc->tx; 1718 1719 ip_off = sizeof (struct ether_header); 1720 if (m->m_flags & M_VLANTAG) { 1721 m = mxge_vlan_tag_insert(m); 1722 if (__predict_false(m == NULL)) 1723 goto drop; 1724 ip_off += ETHER_VLAN_ENCAP_LEN; 1725 } 1726 1727 /* (try to) map the frame for DMA */ 1728 idx = tx->req & tx->mask; 1729 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1730 m, tx->seg_list, &cnt, 1731 BUS_DMA_NOWAIT); 1732 if (__predict_false(err == EFBIG)) { 1733 /* Too many segments in the chain. Try 1734 to defrag */ 1735 m_tmp = m_defrag(m, M_NOWAIT); 1736 if (m_tmp == NULL) { 1737 goto drop; 1738 } 1739 sc->tx_defrag++; 1740 m = m_tmp; 1741 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1742 tx->info[idx].map, 1743 m, tx->seg_list, &cnt, 1744 BUS_DMA_NOWAIT); 1745 } 1746 if (__predict_false(err != 0)) { 1747 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1748 " packet len = %d\n", err, m->m_pkthdr.len); 1749 goto drop; 1750 } 1751 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1752 BUS_DMASYNC_PREWRITE); 1753 tx->info[idx].m = m; 1754 1755 1756 /* TSO is different enough, we handle it in another routine */ 1757 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1758 mxge_encap_tso(sc, m, cnt, ip_off); 1759 return; 1760 } 1761 1762 req = tx->req_list; 1763 cksum_offset = 0; 1764 pseudo_hdr_offset = 0; 1765 flags = MXGEFW_FLAGS_NO_TSO; 1766 1767 /* checksum offloading? */ 1768 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1769 /* ensure ip header is in first mbuf, copy 1770 it to a scratch buffer if not */ 1771 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1772 m_copydata(m, 0, ip_off + sizeof (*ip), 1773 sc->scratch); 1774 ip = (struct ip *)(sc->scratch + ip_off); 1775 } else { 1776 ip = (struct ip *)(mtod(m, char *) + ip_off); 1777 } 1778 cksum_offset = ip_off + (ip->ip_hl << 2); 1779 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1780 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1781 req->cksum_offset = cksum_offset; 1782 flags |= MXGEFW_FLAGS_CKSUM; 1783 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1784 } else { 1785 odd_flag = 0; 1786 } 1787 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1788 flags |= MXGEFW_FLAGS_SMALL; 1789 1790 /* convert segments into a request list */ 1791 cum_len = 0; 1792 seg = tx->seg_list; 1793 req->flags = MXGEFW_FLAGS_FIRST; 1794 for (i = 0; i < cnt; i++) { 1795 req->addr_low = 1796 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1797 req->addr_high = 1798 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1799 req->length = htobe16(seg->ds_len); 1800 req->cksum_offset = cksum_offset; 1801 if (cksum_offset > seg->ds_len) 1802 cksum_offset -= seg->ds_len; 1803 else 1804 cksum_offset = 0; 1805 req->pseudo_hdr_offset = pseudo_hdr_offset; 1806 req->pad = 0; /* complete solid 16-byte block */ 1807 req->rdma_count = 1; 1808 req->flags |= flags | ((cum_len & 1) * odd_flag); 1809 cum_len += seg->ds_len; 1810 seg++; 1811 req++; 1812 req->flags = 0; 1813 } 1814 req--; 1815 /* pad runts to 60 bytes */ 1816 if (cum_len < 60) { 1817 req++; 1818 req->addr_low = 1819 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1820 req->addr_high = 1821 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1822 req->length = htobe16(60 - cum_len); 1823 req->cksum_offset = 0; 1824 req->pseudo_hdr_offset = pseudo_hdr_offset; 1825 req->pad = 0; /* complete solid 16-byte block */ 1826 req->rdma_count = 1; 1827 req->flags |= flags | ((cum_len & 1) * odd_flag); 1828 cnt++; 1829 } 1830 1831 tx->req_list[0].rdma_count = cnt; 1832 #if 0 1833 /* print what the firmware will see */ 1834 for (i = 0; i < cnt; i++) { 1835 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1836 "cso:%d, flags:0x%x, rdma:%d\n", 1837 i, (int)ntohl(tx->req_list[i].addr_high), 1838 (int)ntohl(tx->req_list[i].addr_low), 1839 (int)ntohs(tx->req_list[i].length), 1840 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1841 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1842 tx->req_list[i].rdma_count); 1843 } 1844 printf("--------------\n"); 1845 #endif 1846 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1847 mxge_submit_req(tx, tx->req_list, cnt); 1848 return; 1849 1850 drop: 1851 m_freem(m); 1852 ifp->if_oerrors++; 1853 return; 1854 } 1855 1856 1857 1858 1859 static inline void 1860 mxge_start_locked(mxge_softc_t *sc) 1861 { 1862 struct mbuf *m; 1863 struct ifnet *ifp; 1864 mxge_tx_buf_t *tx; 1865 1866 ifp = sc->ifp; 1867 tx = &sc->tx; 1868 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 1869 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1870 if (m == NULL) { 1871 return; 1872 } 1873 /* let BPF see it */ 1874 BPF_MTAP(ifp, m); 1875 1876 /* give it to the nic */ 1877 mxge_encap(sc, m); 1878 } 1879 /* ran out of transmit slots */ 1880 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 1881 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1882 tx->stall++; 1883 } 1884 } 1885 1886 static void 1887 mxge_start(struct ifnet *ifp) 1888 { 1889 mxge_softc_t *sc = ifp->if_softc; 1890 1891 1892 mtx_lock(&sc->tx_mtx); 1893 mxge_start_locked(sc); 1894 mtx_unlock(&sc->tx_mtx); 1895 } 1896 1897 /* 1898 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1899 * at most 32 bytes at a time, so as to avoid involving the software 1900 * pio handler in the nic. We re-write the first segment's low 1901 * DMA address to mark it valid only after we write the entire chunk 1902 * in a burst 1903 */ 1904 static inline void 1905 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 1906 mcp_kreq_ether_recv_t *src) 1907 { 1908 uint32_t low; 1909 1910 low = src->addr_low; 1911 src->addr_low = 0xffffffff; 1912 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 1913 mb(); 1914 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 1915 mb(); 1916 src->addr_low = low; 1917 dst->addr_low = low; 1918 mb(); 1919 } 1920 1921 static int 1922 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1923 { 1924 bus_dma_segment_t seg; 1925 struct mbuf *m; 1926 mxge_rx_buf_t *rx = &sc->rx_small; 1927 int cnt, err; 1928 1929 m = m_gethdr(M_DONTWAIT, MT_DATA); 1930 if (m == NULL) { 1931 rx->alloc_fail++; 1932 err = ENOBUFS; 1933 goto done; 1934 } 1935 m->m_len = MHLEN; 1936 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1937 &seg, &cnt, BUS_DMA_NOWAIT); 1938 if (err != 0) { 1939 m_free(m); 1940 goto done; 1941 } 1942 rx->info[idx].m = m; 1943 rx->shadow[idx].addr_low = 1944 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1945 rx->shadow[idx].addr_high = 1946 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1947 1948 done: 1949 if ((idx & 7) == 7) 1950 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 1951 return err; 1952 } 1953 1954 static int 1955 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1956 { 1957 bus_dma_segment_t seg[3]; 1958 struct mbuf *m; 1959 mxge_rx_buf_t *rx = &sc->rx_big; 1960 int cnt, err, i; 1961 1962 if (rx->cl_size == MCLBYTES) 1963 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 1964 else 1965 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 1966 if (m == NULL) { 1967 rx->alloc_fail++; 1968 err = ENOBUFS; 1969 goto done; 1970 } 1971 m->m_len = rx->cl_size; 1972 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1973 seg, &cnt, BUS_DMA_NOWAIT); 1974 if (err != 0) { 1975 m_free(m); 1976 goto done; 1977 } 1978 rx->info[idx].m = m; 1979 1980 for (i = 0; i < cnt; i++) { 1981 rx->shadow[idx + i].addr_low = 1982 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 1983 rx->shadow[idx + i].addr_high = 1984 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 1985 } 1986 1987 1988 done: 1989 for (i = 0; i < rx->nbufs; i++) { 1990 if ((idx & 7) == 7) { 1991 mxge_submit_8rx(&rx->lanai[idx - 7], 1992 &rx->shadow[idx - 7]); 1993 } 1994 idx++; 1995 } 1996 return err; 1997 } 1998 1999 /* 2000 * Myri10GE hardware checksums are not valid if the sender 2001 * padded the frame with non-zero padding. This is because 2002 * the firmware just does a simple 16-bit 1s complement 2003 * checksum across the entire frame, excluding the first 14 2004 * bytes. It is best to simply to check the checksum and 2005 * tell the stack about it only if the checksum is good 2006 */ 2007 2008 static inline uint16_t 2009 mxge_rx_csum(struct mbuf *m, int csum) 2010 { 2011 struct ether_header *eh; 2012 struct ip *ip; 2013 uint16_t c; 2014 2015 eh = mtod(m, struct ether_header *); 2016 2017 /* only deal with IPv4 TCP & UDP for now */ 2018 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2019 return 1; 2020 ip = (struct ip *)(eh + 1); 2021 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2022 ip->ip_p != IPPROTO_UDP)) 2023 return 1; 2024 2025 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2026 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2027 - (ip->ip_hl << 2) + ip->ip_p)); 2028 c ^= 0xffff; 2029 return (c); 2030 } 2031 2032 static void 2033 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2034 { 2035 struct ether_vlan_header *evl; 2036 struct ether_header *eh; 2037 uint32_t partial; 2038 2039 evl = mtod(m, struct ether_vlan_header *); 2040 eh = mtod(m, struct ether_header *); 2041 2042 /* 2043 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2044 * after what the firmware thought was the end of the ethernet 2045 * header. 2046 */ 2047 2048 /* put checksum into host byte order */ 2049 *csum = ntohs(*csum); 2050 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2051 (*csum) += ~partial; 2052 (*csum) += ((*csum) < ~partial); 2053 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2054 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2055 2056 /* restore checksum to network byte order; 2057 later consumers expect this */ 2058 *csum = htons(*csum); 2059 2060 /* save the tag */ 2061 m->m_flags |= M_VLANTAG; 2062 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2063 2064 /* 2065 * Remove the 802.1q header by copying the Ethernet 2066 * addresses over it and adjusting the beginning of 2067 * the data in the mbuf. The encapsulated Ethernet 2068 * type field is already in place. 2069 */ 2070 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2071 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2072 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2073 } 2074 2075 2076 static inline void 2077 mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2078 { 2079 struct ifnet *ifp; 2080 struct mbuf *m; 2081 struct ether_header *eh; 2082 mxge_rx_buf_t *rx; 2083 bus_dmamap_t old_map; 2084 int idx; 2085 uint16_t tcpudp_csum; 2086 2087 ifp = sc->ifp; 2088 rx = &sc->rx_big; 2089 idx = rx->cnt & rx->mask; 2090 rx->cnt += rx->nbufs; 2091 /* save a pointer to the received mbuf */ 2092 m = rx->info[idx].m; 2093 /* try to replace the received mbuf */ 2094 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 2095 /* drop the frame -- the old mbuf is re-cycled */ 2096 ifp->if_ierrors++; 2097 return; 2098 } 2099 2100 /* unmap the received buffer */ 2101 old_map = rx->info[idx].map; 2102 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2103 bus_dmamap_unload(rx->dmat, old_map); 2104 2105 /* swap the bus_dmamap_t's */ 2106 rx->info[idx].map = rx->extra_map; 2107 rx->extra_map = old_map; 2108 2109 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2110 * aligned */ 2111 m->m_data += MXGEFW_PAD; 2112 2113 m->m_pkthdr.rcvif = ifp; 2114 m->m_len = m->m_pkthdr.len = len; 2115 ifp->if_ipackets++; 2116 eh = mtod(m, struct ether_header *); 2117 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2118 mxge_vlan_tag_remove(m, &csum); 2119 } 2120 /* if the checksum is valid, mark it in the mbuf header */ 2121 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2122 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2123 return; 2124 /* otherwise, it was a UDP frame, or a TCP frame which 2125 we could not do LRO on. Tell the stack that the 2126 checksum is good */ 2127 m->m_pkthdr.csum_data = 0xffff; 2128 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2129 } 2130 /* pass the frame up the stack */ 2131 (*ifp->if_input)(ifp, m); 2132 } 2133 2134 static inline void 2135 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2136 { 2137 struct ifnet *ifp; 2138 struct ether_header *eh; 2139 struct mbuf *m; 2140 mxge_rx_buf_t *rx; 2141 bus_dmamap_t old_map; 2142 int idx; 2143 uint16_t tcpudp_csum; 2144 2145 ifp = sc->ifp; 2146 rx = &sc->rx_small; 2147 idx = rx->cnt & rx->mask; 2148 rx->cnt++; 2149 /* save a pointer to the received mbuf */ 2150 m = rx->info[idx].m; 2151 /* try to replace the received mbuf */ 2152 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 2153 /* drop the frame -- the old mbuf is re-cycled */ 2154 ifp->if_ierrors++; 2155 return; 2156 } 2157 2158 /* unmap the received buffer */ 2159 old_map = rx->info[idx].map; 2160 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2161 bus_dmamap_unload(rx->dmat, old_map); 2162 2163 /* swap the bus_dmamap_t's */ 2164 rx->info[idx].map = rx->extra_map; 2165 rx->extra_map = old_map; 2166 2167 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2168 * aligned */ 2169 m->m_data += MXGEFW_PAD; 2170 2171 m->m_pkthdr.rcvif = ifp; 2172 m->m_len = m->m_pkthdr.len = len; 2173 ifp->if_ipackets++; 2174 eh = mtod(m, struct ether_header *); 2175 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2176 mxge_vlan_tag_remove(m, &csum); 2177 } 2178 /* if the checksum is valid, mark it in the mbuf header */ 2179 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2180 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2181 return; 2182 /* otherwise, it was a UDP frame, or a TCP frame which 2183 we could not do LRO on. Tell the stack that the 2184 checksum is good */ 2185 m->m_pkthdr.csum_data = 0xffff; 2186 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2187 } 2188 2189 /* pass the frame up the stack */ 2190 (*ifp->if_input)(ifp, m); 2191 } 2192 2193 static inline void 2194 mxge_clean_rx_done(mxge_softc_t *sc) 2195 { 2196 mxge_rx_done_t *rx_done = &sc->rx_done; 2197 struct lro_entry *lro; 2198 int limit = 0; 2199 uint16_t length; 2200 uint16_t checksum; 2201 2202 2203 while (rx_done->entry[rx_done->idx].length != 0) { 2204 length = ntohs(rx_done->entry[rx_done->idx].length); 2205 rx_done->entry[rx_done->idx].length = 0; 2206 checksum = rx_done->entry[rx_done->idx].checksum; 2207 if (length <= (MHLEN - MXGEFW_PAD)) 2208 mxge_rx_done_small(sc, length, checksum); 2209 else 2210 mxge_rx_done_big(sc, length, checksum); 2211 rx_done->cnt++; 2212 rx_done->idx = rx_done->cnt & rx_done->mask; 2213 2214 /* limit potential for livelock */ 2215 if (__predict_false(++limit > 2 * rx_done->mask)) 2216 break; 2217 } 2218 while(!SLIST_EMPTY(&sc->lro_active)) { 2219 lro = SLIST_FIRST(&sc->lro_active); 2220 SLIST_REMOVE_HEAD(&sc->lro_active, next); 2221 mxge_lro_flush(sc, lro); 2222 } 2223 } 2224 2225 2226 static inline void 2227 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 2228 { 2229 struct ifnet *ifp; 2230 mxge_tx_buf_t *tx; 2231 struct mbuf *m; 2232 bus_dmamap_t map; 2233 int idx, limit; 2234 2235 limit = 0; 2236 tx = &sc->tx; 2237 ifp = sc->ifp; 2238 while (tx->pkt_done != mcp_idx) { 2239 idx = tx->done & tx->mask; 2240 tx->done++; 2241 m = tx->info[idx].m; 2242 /* mbuf and DMA map only attached to the first 2243 segment per-mbuf */ 2244 if (m != NULL) { 2245 ifp->if_opackets++; 2246 tx->info[idx].m = NULL; 2247 map = tx->info[idx].map; 2248 bus_dmamap_unload(tx->dmat, map); 2249 m_freem(m); 2250 } 2251 if (tx->info[idx].flag) { 2252 tx->info[idx].flag = 0; 2253 tx->pkt_done++; 2254 } 2255 /* limit potential for livelock by only handling 2256 2 full tx rings per call */ 2257 if (__predict_false(++limit > 2 * tx->mask)) 2258 break; 2259 } 2260 2261 /* If we have space, clear IFF_OACTIVE to tell the stack that 2262 its OK to send packets */ 2263 2264 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2265 tx->req - tx->done < (tx->mask + 1)/4) { 2266 mtx_lock(&sc->tx_mtx); 2267 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2268 sc->tx.wake++; 2269 mxge_start_locked(sc); 2270 mtx_unlock(&sc->tx_mtx); 2271 } 2272 } 2273 2274 static void 2275 mxge_intr(void *arg) 2276 { 2277 mxge_softc_t *sc = arg; 2278 mcp_irq_data_t *stats = sc->fw_stats; 2279 mxge_tx_buf_t *tx = &sc->tx; 2280 mxge_rx_done_t *rx_done = &sc->rx_done; 2281 uint32_t send_done_count; 2282 uint8_t valid; 2283 2284 2285 /* make sure the DMA has finished */ 2286 if (!stats->valid) { 2287 return; 2288 } 2289 valid = stats->valid; 2290 2291 if (!sc->msi_enabled) { 2292 /* lower legacy IRQ */ 2293 *sc->irq_deassert = 0; 2294 if (!mxge_deassert_wait) 2295 /* don't wait for conf. that irq is low */ 2296 stats->valid = 0; 2297 } else { 2298 stats->valid = 0; 2299 } 2300 2301 /* loop while waiting for legacy irq deassertion */ 2302 do { 2303 /* check for transmit completes and receives */ 2304 send_done_count = be32toh(stats->send_done_count); 2305 while ((send_done_count != tx->pkt_done) || 2306 (rx_done->entry[rx_done->idx].length != 0)) { 2307 mxge_tx_done(sc, (int)send_done_count); 2308 mxge_clean_rx_done(sc); 2309 send_done_count = be32toh(stats->send_done_count); 2310 } 2311 } while (*((volatile uint8_t *) &stats->valid)); 2312 2313 if (__predict_false(stats->stats_updated)) { 2314 if (sc->link_state != stats->link_up) { 2315 sc->link_state = stats->link_up; 2316 if (sc->link_state) { 2317 if_link_state_change(sc->ifp, LINK_STATE_UP); 2318 if (mxge_verbose) 2319 device_printf(sc->dev, "link up\n"); 2320 } else { 2321 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2322 if (mxge_verbose) 2323 device_printf(sc->dev, "link down\n"); 2324 } 2325 } 2326 if (sc->rdma_tags_available != 2327 be32toh(sc->fw_stats->rdma_tags_available)) { 2328 sc->rdma_tags_available = 2329 be32toh(sc->fw_stats->rdma_tags_available); 2330 device_printf(sc->dev, "RDMA timed out! %d tags " 2331 "left\n", sc->rdma_tags_available); 2332 } 2333 sc->down_cnt += stats->link_down; 2334 } 2335 2336 /* check to see if we have rx token to pass back */ 2337 if (valid & 0x1) 2338 *sc->irq_claim = be32toh(3); 2339 *(sc->irq_claim + 1) = be32toh(3); 2340 } 2341 2342 static void 2343 mxge_init(void *arg) 2344 { 2345 } 2346 2347 2348 2349 static void 2350 mxge_free_mbufs(mxge_softc_t *sc) 2351 { 2352 int i; 2353 2354 for (i = 0; i <= sc->rx_big.mask; i++) { 2355 if (sc->rx_big.info[i].m == NULL) 2356 continue; 2357 bus_dmamap_unload(sc->rx_big.dmat, 2358 sc->rx_big.info[i].map); 2359 m_freem(sc->rx_big.info[i].m); 2360 sc->rx_big.info[i].m = NULL; 2361 } 2362 2363 for (i = 0; i <= sc->rx_small.mask; i++) { 2364 if (sc->rx_small.info[i].m == NULL) 2365 continue; 2366 bus_dmamap_unload(sc->rx_small.dmat, 2367 sc->rx_small.info[i].map); 2368 m_freem(sc->rx_small.info[i].m); 2369 sc->rx_small.info[i].m = NULL; 2370 } 2371 2372 for (i = 0; i <= sc->tx.mask; i++) { 2373 sc->tx.info[i].flag = 0; 2374 if (sc->tx.info[i].m == NULL) 2375 continue; 2376 bus_dmamap_unload(sc->tx.dmat, 2377 sc->tx.info[i].map); 2378 m_freem(sc->tx.info[i].m); 2379 sc->tx.info[i].m = NULL; 2380 } 2381 } 2382 2383 static void 2384 mxge_free_rings(mxge_softc_t *sc) 2385 { 2386 int i; 2387 2388 if (sc->rx_done.entry != NULL) 2389 mxge_dma_free(&sc->rx_done.dma); 2390 sc->rx_done.entry = NULL; 2391 if (sc->tx.req_bytes != NULL) 2392 free(sc->tx.req_bytes, M_DEVBUF); 2393 if (sc->tx.seg_list != NULL) 2394 free(sc->tx.seg_list, M_DEVBUF); 2395 if (sc->rx_small.shadow != NULL) 2396 free(sc->rx_small.shadow, M_DEVBUF); 2397 if (sc->rx_big.shadow != NULL) 2398 free(sc->rx_big.shadow, M_DEVBUF); 2399 if (sc->tx.info != NULL) { 2400 if (sc->tx.dmat != NULL) { 2401 for (i = 0; i <= sc->tx.mask; i++) { 2402 bus_dmamap_destroy(sc->tx.dmat, 2403 sc->tx.info[i].map); 2404 } 2405 bus_dma_tag_destroy(sc->tx.dmat); 2406 } 2407 free(sc->tx.info, M_DEVBUF); 2408 } 2409 if (sc->rx_small.info != NULL) { 2410 if (sc->rx_small.dmat != NULL) { 2411 for (i = 0; i <= sc->rx_small.mask; i++) { 2412 bus_dmamap_destroy(sc->rx_small.dmat, 2413 sc->rx_small.info[i].map); 2414 } 2415 bus_dmamap_destroy(sc->rx_small.dmat, 2416 sc->rx_small.extra_map); 2417 bus_dma_tag_destroy(sc->rx_small.dmat); 2418 } 2419 free(sc->rx_small.info, M_DEVBUF); 2420 } 2421 if (sc->rx_big.info != NULL) { 2422 if (sc->rx_big.dmat != NULL) { 2423 for (i = 0; i <= sc->rx_big.mask; i++) { 2424 bus_dmamap_destroy(sc->rx_big.dmat, 2425 sc->rx_big.info[i].map); 2426 } 2427 bus_dmamap_destroy(sc->rx_big.dmat, 2428 sc->rx_big.extra_map); 2429 bus_dma_tag_destroy(sc->rx_big.dmat); 2430 } 2431 free(sc->rx_big.info, M_DEVBUF); 2432 } 2433 } 2434 2435 static int 2436 mxge_alloc_rings(mxge_softc_t *sc) 2437 { 2438 mxge_cmd_t cmd; 2439 int tx_ring_size, rx_ring_size; 2440 int tx_ring_entries, rx_ring_entries; 2441 int i, err; 2442 unsigned long bytes; 2443 2444 /* get ring sizes */ 2445 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2446 tx_ring_size = cmd.data0; 2447 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2448 if (err != 0) { 2449 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2450 goto abort_with_nothing; 2451 } 2452 2453 rx_ring_size = cmd.data0; 2454 2455 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2456 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2457 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2458 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2459 IFQ_SET_READY(&sc->ifp->if_snd); 2460 2461 sc->tx.mask = tx_ring_entries - 1; 2462 sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 2463 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2464 sc->rx_done.mask = (2 * rx_ring_entries) - 1; 2465 2466 err = ENOMEM; 2467 2468 /* allocate interrupt queues */ 2469 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 2470 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 2471 if (err != 0) 2472 goto abort_with_nothing; 2473 sc->rx_done.entry = sc->rx_done.dma.addr; 2474 bzero(sc->rx_done.entry, bytes); 2475 2476 /* allocate the tx request copy block */ 2477 bytes = 8 + 2478 sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4); 2479 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2480 if (sc->tx.req_bytes == NULL) 2481 goto abort_with_alloc; 2482 /* ensure req_list entries are aligned to 8 bytes */ 2483 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2484 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2485 2486 /* allocate the tx busdma segment list */ 2487 bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc; 2488 sc->tx.seg_list = (bus_dma_segment_t *) 2489 malloc(bytes, M_DEVBUF, M_WAITOK); 2490 if (sc->tx.seg_list == NULL) 2491 goto abort_with_alloc; 2492 2493 /* allocate the rx shadow rings */ 2494 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2495 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2496 if (sc->rx_small.shadow == NULL) 2497 goto abort_with_alloc; 2498 2499 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2500 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2501 if (sc->rx_big.shadow == NULL) 2502 goto abort_with_alloc; 2503 2504 /* allocate the host info rings */ 2505 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2506 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2507 if (sc->tx.info == NULL) 2508 goto abort_with_alloc; 2509 2510 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2511 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2512 if (sc->rx_small.info == NULL) 2513 goto abort_with_alloc; 2514 2515 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2516 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2517 if (sc->rx_big.info == NULL) 2518 goto abort_with_alloc; 2519 2520 /* allocate the busdma resources */ 2521 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2522 1, /* alignment */ 2523 sc->tx.boundary, /* boundary */ 2524 BUS_SPACE_MAXADDR, /* low */ 2525 BUS_SPACE_MAXADDR, /* high */ 2526 NULL, NULL, /* filter */ 2527 65536 + 256, /* maxsize */ 2528 sc->tx.max_desc - 2, /* num segs */ 2529 sc->tx.boundary, /* maxsegsize */ 2530 BUS_DMA_ALLOCNOW, /* flags */ 2531 NULL, NULL, /* lock */ 2532 &sc->tx.dmat); /* tag */ 2533 2534 if (err != 0) { 2535 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2536 err); 2537 goto abort_with_alloc; 2538 } 2539 2540 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2541 1, /* alignment */ 2542 4096, /* boundary */ 2543 BUS_SPACE_MAXADDR, /* low */ 2544 BUS_SPACE_MAXADDR, /* high */ 2545 NULL, NULL, /* filter */ 2546 MHLEN, /* maxsize */ 2547 1, /* num segs */ 2548 MHLEN, /* maxsegsize */ 2549 BUS_DMA_ALLOCNOW, /* flags */ 2550 NULL, NULL, /* lock */ 2551 &sc->rx_small.dmat); /* tag */ 2552 if (err != 0) { 2553 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2554 err); 2555 goto abort_with_alloc; 2556 } 2557 2558 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2559 1, /* alignment */ 2560 4096, /* boundary */ 2561 BUS_SPACE_MAXADDR, /* low */ 2562 BUS_SPACE_MAXADDR, /* high */ 2563 NULL, NULL, /* filter */ 2564 3*4096, /* maxsize */ 2565 3, /* num segs */ 2566 4096, /* maxsegsize */ 2567 BUS_DMA_ALLOCNOW, /* flags */ 2568 NULL, NULL, /* lock */ 2569 &sc->rx_big.dmat); /* tag */ 2570 if (err != 0) { 2571 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2572 err); 2573 goto abort_with_alloc; 2574 } 2575 2576 /* now use these tags to setup dmamaps for each slot 2577 in each ring */ 2578 for (i = 0; i <= sc->tx.mask; i++) { 2579 err = bus_dmamap_create(sc->tx.dmat, 0, 2580 &sc->tx.info[i].map); 2581 if (err != 0) { 2582 device_printf(sc->dev, "Err %d tx dmamap\n", 2583 err); 2584 goto abort_with_alloc; 2585 } 2586 } 2587 for (i = 0; i <= sc->rx_small.mask; i++) { 2588 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2589 &sc->rx_small.info[i].map); 2590 if (err != 0) { 2591 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2592 err); 2593 goto abort_with_alloc; 2594 } 2595 } 2596 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2597 &sc->rx_small.extra_map); 2598 if (err != 0) { 2599 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2600 err); 2601 goto abort_with_alloc; 2602 } 2603 2604 for (i = 0; i <= sc->rx_big.mask; i++) { 2605 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2606 &sc->rx_big.info[i].map); 2607 if (err != 0) { 2608 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2609 err); 2610 goto abort_with_alloc; 2611 } 2612 } 2613 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2614 &sc->rx_big.extra_map); 2615 if (err != 0) { 2616 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2617 err); 2618 goto abort_with_alloc; 2619 } 2620 return 0; 2621 2622 abort_with_alloc: 2623 mxge_free_rings(sc); 2624 2625 abort_with_nothing: 2626 return err; 2627 } 2628 2629 static void 2630 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 2631 { 2632 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 2633 2634 if (bufsize < MCLBYTES) { 2635 /* easy, everything fits in a single buffer */ 2636 *big_buf_size = MCLBYTES; 2637 *cl_size = MCLBYTES; 2638 *nbufs = 1; 2639 return; 2640 } 2641 2642 if (bufsize < MJUMPAGESIZE) { 2643 /* still easy, everything still fits in a single buffer */ 2644 *big_buf_size = MJUMPAGESIZE; 2645 *cl_size = MJUMPAGESIZE; 2646 *nbufs = 1; 2647 return; 2648 } 2649 /* now we need to use virtually contiguous buffers */ 2650 *cl_size = MJUM9BYTES; 2651 *big_buf_size = 4096; 2652 *nbufs = mtu / 4096 + 1; 2653 /* needs to be a power of two, so round up */ 2654 if (*nbufs == 3) 2655 *nbufs = 4; 2656 } 2657 2658 static int 2659 mxge_open(mxge_softc_t *sc) 2660 { 2661 mxge_cmd_t cmd; 2662 int i, err, big_bytes; 2663 bus_dmamap_t map; 2664 bus_addr_t bus; 2665 struct lro_entry *lro_entry; 2666 2667 SLIST_INIT(&sc->lro_free); 2668 SLIST_INIT(&sc->lro_active); 2669 2670 for (i = 0; i < sc->lro_cnt; i++) { 2671 lro_entry = (struct lro_entry *) 2672 malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO); 2673 if (lro_entry == NULL) { 2674 sc->lro_cnt = i; 2675 break; 2676 } 2677 SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next); 2678 } 2679 2680 /* Copy the MAC address in case it was overridden */ 2681 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2682 2683 err = mxge_reset(sc, 1); 2684 if (err != 0) { 2685 device_printf(sc->dev, "failed to reset\n"); 2686 return EIO; 2687 } 2688 2689 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, 2690 &sc->rx_big.cl_size, &sc->rx_big.nbufs); 2691 2692 cmd.data0 = sc->rx_big.nbufs; 2693 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 2694 &cmd); 2695 /* error is only meaningful if we're trying to set 2696 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 2697 if (err && sc->rx_big.nbufs > 1) { 2698 device_printf(sc->dev, 2699 "Failed to set alway-use-n to %d\n", 2700 sc->rx_big.nbufs); 2701 return EIO; 2702 } 2703 /* get the lanai pointers to the send and receive rings */ 2704 2705 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2706 sc->tx.lanai = 2707 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2708 err |= mxge_send_cmd(sc, 2709 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2710 sc->rx_small.lanai = 2711 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2712 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2713 sc->rx_big.lanai = 2714 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2715 2716 if (err != 0) { 2717 device_printf(sc->dev, 2718 "failed to get ring sizes or locations\n"); 2719 return EIO; 2720 } 2721 2722 /* stock receive rings */ 2723 for (i = 0; i <= sc->rx_small.mask; i++) { 2724 map = sc->rx_small.info[i].map; 2725 err = mxge_get_buf_small(sc, map, i); 2726 if (err) { 2727 device_printf(sc->dev, "alloced %d/%d smalls\n", 2728 i, sc->rx_small.mask + 1); 2729 goto abort; 2730 } 2731 } 2732 for (i = 0; i <= sc->rx_big.mask; i++) { 2733 sc->rx_big.shadow[i].addr_low = 0xffffffff; 2734 sc->rx_big.shadow[i].addr_high = 0xffffffff; 2735 } 2736 for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) { 2737 map = sc->rx_big.info[i].map; 2738 err = mxge_get_buf_big(sc, map, i); 2739 if (err) { 2740 device_printf(sc->dev, "alloced %d/%d bigs\n", 2741 i, sc->rx_big.mask + 1); 2742 goto abort; 2743 } 2744 } 2745 2746 /* Give the firmware the mtu and the big and small buffer 2747 sizes. The firmware wants the big buf size to be a power 2748 of two. Luckily, FreeBSD's clusters are powers of two */ 2749 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 2750 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2751 cmd.data0 = MHLEN - MXGEFW_PAD; 2752 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2753 &cmd); 2754 cmd.data0 = big_bytes; 2755 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2756 2757 if (err != 0) { 2758 device_printf(sc->dev, "failed to setup params\n"); 2759 goto abort; 2760 } 2761 2762 /* Now give him the pointer to the stats block */ 2763 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2764 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2765 cmd.data2 = sizeof(struct mcp_irq_data); 2766 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2767 2768 if (err != 0) { 2769 bus = sc->fw_stats_dma.bus_addr; 2770 bus += offsetof(struct mcp_irq_data, send_done_count); 2771 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2772 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2773 err = mxge_send_cmd(sc, 2774 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2775 &cmd); 2776 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2777 sc->fw_multicast_support = 0; 2778 } else { 2779 sc->fw_multicast_support = 1; 2780 } 2781 2782 if (err != 0) { 2783 device_printf(sc->dev, "failed to setup params\n"); 2784 goto abort; 2785 } 2786 2787 /* Finally, start the firmware running */ 2788 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2789 if (err) { 2790 device_printf(sc->dev, "Couldn't bring up link\n"); 2791 goto abort; 2792 } 2793 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2794 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2795 2796 return 0; 2797 2798 2799 abort: 2800 mxge_free_mbufs(sc); 2801 2802 return err; 2803 } 2804 2805 static int 2806 mxge_close(mxge_softc_t *sc) 2807 { 2808 struct lro_entry *lro_entry; 2809 mxge_cmd_t cmd; 2810 int err, old_down_cnt; 2811 2812 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2813 old_down_cnt = sc->down_cnt; 2814 mb(); 2815 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2816 if (err) { 2817 device_printf(sc->dev, "Couldn't bring down link\n"); 2818 } 2819 if (old_down_cnt == sc->down_cnt) { 2820 /* wait for down irq */ 2821 DELAY(10 * sc->intr_coal_delay); 2822 } 2823 if (old_down_cnt == sc->down_cnt) { 2824 device_printf(sc->dev, "never got down irq\n"); 2825 } 2826 2827 mxge_free_mbufs(sc); 2828 2829 while (!SLIST_EMPTY(&sc->lro_free)) { 2830 lro_entry = SLIST_FIRST(&sc->lro_free); 2831 SLIST_REMOVE_HEAD(&sc->lro_free, next); 2832 } 2833 return 0; 2834 } 2835 2836 static void 2837 mxge_setup_cfg_space(mxge_softc_t *sc) 2838 { 2839 device_t dev = sc->dev; 2840 int reg; 2841 uint16_t cmd, lnk, pectl; 2842 2843 /* find the PCIe link width and set max read request to 4KB*/ 2844 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 2845 lnk = pci_read_config(dev, reg + 0x12, 2); 2846 sc->link_width = (lnk >> 4) & 0x3f; 2847 2848 pectl = pci_read_config(dev, reg + 0x8, 2); 2849 pectl = (pectl & ~0x7000) | (5 << 12); 2850 pci_write_config(dev, reg + 0x8, pectl, 2); 2851 } 2852 2853 /* Enable DMA and Memory space access */ 2854 pci_enable_busmaster(dev); 2855 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2856 cmd |= PCIM_CMD_MEMEN; 2857 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2858 } 2859 2860 static uint32_t 2861 mxge_read_reboot(mxge_softc_t *sc) 2862 { 2863 device_t dev = sc->dev; 2864 uint32_t vs; 2865 2866 /* find the vendor specific offset */ 2867 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 2868 device_printf(sc->dev, 2869 "could not find vendor specific offset\n"); 2870 return (uint32_t)-1; 2871 } 2872 /* enable read32 mode */ 2873 pci_write_config(dev, vs + 0x10, 0x3, 1); 2874 /* tell NIC which register to read */ 2875 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 2876 return (pci_read_config(dev, vs + 0x14, 4)); 2877 } 2878 2879 static void 2880 mxge_watchdog_reset(mxge_softc_t *sc) 2881 { 2882 int err; 2883 uint32_t reboot; 2884 uint16_t cmd; 2885 2886 err = ENXIO; 2887 2888 device_printf(sc->dev, "Watchdog reset!\n"); 2889 2890 /* 2891 * check to see if the NIC rebooted. If it did, then all of 2892 * PCI config space has been reset, and things like the 2893 * busmaster bit will be zero. If this is the case, then we 2894 * must restore PCI config space before the NIC can be used 2895 * again 2896 */ 2897 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2898 if (cmd == 0xffff) { 2899 /* 2900 * maybe the watchdog caught the NIC rebooting; wait 2901 * up to 100ms for it to finish. If it does not come 2902 * back, then give up 2903 */ 2904 DELAY(1000*100); 2905 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2906 if (cmd == 0xffff) { 2907 device_printf(sc->dev, "NIC disappeared!\n"); 2908 goto abort; 2909 } 2910 } 2911 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 2912 /* print the reboot status */ 2913 reboot = mxge_read_reboot(sc); 2914 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 2915 reboot); 2916 /* restore PCI configuration space */ 2917 2918 /* XXXX waiting for pci_cfg_restore() to be exported */ 2919 goto abort; /* just abort for now */ 2920 2921 /* and redo any changes we made to our config space */ 2922 mxge_setup_cfg_space(sc); 2923 } else { 2924 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 2925 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 2926 sc->tx.req, sc->tx.done); 2927 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 2928 sc->tx.pkt_done, 2929 be32toh(sc->fw_stats->send_done_count)); 2930 } 2931 2932 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 2933 mxge_close(sc); 2934 err = mxge_open(sc); 2935 } 2936 2937 abort: 2938 /* 2939 * stop the watchdog if the nic is dead, to avoid spamming the 2940 * console 2941 */ 2942 if (err != 0) { 2943 callout_stop(&sc->co_hdl); 2944 } 2945 } 2946 2947 static void 2948 mxge_watchdog(mxge_softc_t *sc) 2949 { 2950 mxge_tx_buf_t *tx = &sc->tx; 2951 2952 /* see if we have outstanding transmits, which 2953 have been pending for more than mxge_ticks */ 2954 if (tx->req != tx->done && 2955 tx->watchdog_req != tx->watchdog_done && 2956 tx->done == tx->watchdog_done) 2957 mxge_watchdog_reset(sc); 2958 2959 tx->watchdog_req = tx->req; 2960 tx->watchdog_done = tx->done; 2961 } 2962 2963 static void 2964 mxge_tick(void *arg) 2965 { 2966 mxge_softc_t *sc = arg; 2967 2968 2969 /* Synchronize with possible callout reset/stop. */ 2970 if (callout_pending(&sc->co_hdl) || 2971 !callout_active(&sc->co_hdl)) { 2972 mtx_unlock(&sc->driver_mtx); 2973 return; 2974 } 2975 2976 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 2977 mxge_watchdog(sc); 2978 } 2979 2980 static int 2981 mxge_media_change(struct ifnet *ifp) 2982 { 2983 return EINVAL; 2984 } 2985 2986 static int 2987 mxge_change_mtu(mxge_softc_t *sc, int mtu) 2988 { 2989 struct ifnet *ifp = sc->ifp; 2990 int real_mtu, old_mtu; 2991 int err = 0; 2992 2993 2994 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 2995 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 2996 return EINVAL; 2997 mtx_lock(&sc->driver_mtx); 2998 old_mtu = ifp->if_mtu; 2999 ifp->if_mtu = mtu; 3000 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3001 callout_stop(&sc->co_hdl); 3002 mxge_close(sc); 3003 err = mxge_open(sc); 3004 if (err != 0) { 3005 ifp->if_mtu = old_mtu; 3006 mxge_close(sc); 3007 (void) mxge_open(sc); 3008 } 3009 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3010 } 3011 mtx_unlock(&sc->driver_mtx); 3012 return err; 3013 } 3014 3015 static void 3016 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3017 { 3018 mxge_softc_t *sc = ifp->if_softc; 3019 3020 3021 if (sc == NULL) 3022 return; 3023 ifmr->ifm_status = IFM_AVALID; 3024 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 3025 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3026 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 3027 } 3028 3029 static int 3030 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3031 { 3032 mxge_softc_t *sc = ifp->if_softc; 3033 struct ifreq *ifr = (struct ifreq *)data; 3034 int err, mask; 3035 3036 err = 0; 3037 switch (command) { 3038 case SIOCSIFADDR: 3039 case SIOCGIFADDR: 3040 err = ether_ioctl(ifp, command, data); 3041 break; 3042 3043 case SIOCSIFMTU: 3044 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3045 break; 3046 3047 case SIOCSIFFLAGS: 3048 mtx_lock(&sc->driver_mtx); 3049 if (ifp->if_flags & IFF_UP) { 3050 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3051 err = mxge_open(sc); 3052 callout_reset(&sc->co_hdl, mxge_ticks, 3053 mxge_tick, sc); 3054 } else { 3055 /* take care of promis can allmulti 3056 flag chages */ 3057 mxge_change_promisc(sc, 3058 ifp->if_flags & IFF_PROMISC); 3059 mxge_set_multicast_list(sc); 3060 } 3061 } else { 3062 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3063 mxge_close(sc); 3064 callout_stop(&sc->co_hdl); 3065 } 3066 } 3067 mtx_unlock(&sc->driver_mtx); 3068 break; 3069 3070 case SIOCADDMULTI: 3071 case SIOCDELMULTI: 3072 mtx_lock(&sc->driver_mtx); 3073 mxge_set_multicast_list(sc); 3074 mtx_unlock(&sc->driver_mtx); 3075 break; 3076 3077 case SIOCSIFCAP: 3078 mtx_lock(&sc->driver_mtx); 3079 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3080 if (mask & IFCAP_TXCSUM) { 3081 if (IFCAP_TXCSUM & ifp->if_capenable) { 3082 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3083 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3084 | CSUM_TSO); 3085 } else { 3086 ifp->if_capenable |= IFCAP_TXCSUM; 3087 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3088 } 3089 } else if (mask & IFCAP_RXCSUM) { 3090 if (IFCAP_RXCSUM & ifp->if_capenable) { 3091 ifp->if_capenable &= ~IFCAP_RXCSUM; 3092 sc->csum_flag = 0; 3093 } else { 3094 ifp->if_capenable |= IFCAP_RXCSUM; 3095 sc->csum_flag = 1; 3096 } 3097 } 3098 if (mask & IFCAP_TSO4) { 3099 if (IFCAP_TSO4 & ifp->if_capenable) { 3100 ifp->if_capenable &= ~IFCAP_TSO4; 3101 ifp->if_hwassist &= ~CSUM_TSO; 3102 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3103 ifp->if_capenable |= IFCAP_TSO4; 3104 ifp->if_hwassist |= CSUM_TSO; 3105 } else { 3106 printf("mxge requires tx checksum offload" 3107 " be enabled to use TSO\n"); 3108 err = EINVAL; 3109 } 3110 } 3111 3112 if (mask & IFCAP_VLAN_HWTAGGING) 3113 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3114 mtx_unlock(&sc->driver_mtx); 3115 VLAN_CAPABILITIES(ifp); 3116 3117 break; 3118 3119 case SIOCGIFMEDIA: 3120 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3121 &sc->media, command); 3122 break; 3123 3124 default: 3125 err = ENOTTY; 3126 } 3127 return err; 3128 } 3129 3130 static void 3131 mxge_fetch_tunables(mxge_softc_t *sc) 3132 { 3133 3134 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3135 &mxge_flow_control); 3136 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3137 &mxge_intr_coal_delay); 3138 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3139 &mxge_nvidia_ecrc_enable); 3140 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3141 &mxge_force_firmware); 3142 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3143 &mxge_deassert_wait); 3144 TUNABLE_INT_FETCH("hw.mxge.verbose", 3145 &mxge_verbose); 3146 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3147 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3148 3149 if (bootverbose) 3150 mxge_verbose = 1; 3151 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3152 mxge_intr_coal_delay = 30; 3153 if (mxge_ticks == 0) 3154 mxge_ticks = hz; 3155 sc->pause = mxge_flow_control; 3156 3157 } 3158 3159 static int 3160 mxge_attach(device_t dev) 3161 { 3162 mxge_softc_t *sc = device_get_softc(dev); 3163 struct ifnet *ifp; 3164 int count, rid, err; 3165 3166 sc->dev = dev; 3167 mxge_fetch_tunables(sc); 3168 3169 err = bus_dma_tag_create(NULL, /* parent */ 3170 1, /* alignment */ 3171 4096, /* boundary */ 3172 BUS_SPACE_MAXADDR, /* low */ 3173 BUS_SPACE_MAXADDR, /* high */ 3174 NULL, NULL, /* filter */ 3175 65536 + 256, /* maxsize */ 3176 MXGE_MAX_SEND_DESC, /* num segs */ 3177 4096, /* maxsegsize */ 3178 0, /* flags */ 3179 NULL, NULL, /* lock */ 3180 &sc->parent_dmat); /* tag */ 3181 3182 if (err != 0) { 3183 device_printf(sc->dev, "Err %d allocating parent dmat\n", 3184 err); 3185 goto abort_with_nothing; 3186 } 3187 3188 ifp = sc->ifp = if_alloc(IFT_ETHER); 3189 if (ifp == NULL) { 3190 device_printf(dev, "can not if_alloc()\n"); 3191 err = ENOSPC; 3192 goto abort_with_parent_dmat; 3193 } 3194 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 3195 device_get_nameunit(dev)); 3196 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 3197 snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", 3198 device_get_nameunit(dev)); 3199 mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); 3200 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 3201 "%s:drv", device_get_nameunit(dev)); 3202 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 3203 MTX_NETWORK_LOCK, MTX_DEF); 3204 3205 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 3206 3207 mxge_setup_cfg_space(sc); 3208 3209 /* Map the board into the kernel */ 3210 rid = PCIR_BARS; 3211 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 3212 ~0, 1, RF_ACTIVE); 3213 if (sc->mem_res == NULL) { 3214 device_printf(dev, "could not map memory\n"); 3215 err = ENXIO; 3216 goto abort_with_lock; 3217 } 3218 sc->sram = rman_get_virtual(sc->mem_res); 3219 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 3220 if (sc->sram_size > rman_get_size(sc->mem_res)) { 3221 device_printf(dev, "impossible memory region size %ld\n", 3222 rman_get_size(sc->mem_res)); 3223 err = ENXIO; 3224 goto abort_with_mem_res; 3225 } 3226 3227 /* make NULL terminated copy of the EEPROM strings section of 3228 lanai SRAM */ 3229 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 3230 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 3231 rman_get_bushandle(sc->mem_res), 3232 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 3233 sc->eeprom_strings, 3234 MXGE_EEPROM_STRINGS_SIZE - 2); 3235 err = mxge_parse_strings(sc); 3236 if (err != 0) 3237 goto abort_with_mem_res; 3238 3239 /* Enable write combining for efficient use of PCIe bus */ 3240 mxge_enable_wc(sc); 3241 3242 /* Allocate the out of band dma memory */ 3243 err = mxge_dma_alloc(sc, &sc->cmd_dma, 3244 sizeof (mxge_cmd_t), 64); 3245 if (err != 0) 3246 goto abort_with_mem_res; 3247 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 3248 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 3249 if (err != 0) 3250 goto abort_with_cmd_dma; 3251 3252 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 3253 sizeof (*sc->fw_stats), 64); 3254 if (err != 0) 3255 goto abort_with_zeropad_dma; 3256 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 3257 3258 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 3259 if (err != 0) 3260 goto abort_with_fw_stats; 3261 3262 /* Add our ithread */ 3263 count = pci_msi_count(dev); 3264 if (count == 1 && pci_alloc_msi(dev, &count) == 0) { 3265 rid = 1; 3266 sc->msi_enabled = 1; 3267 } else { 3268 rid = 0; 3269 } 3270 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 3271 1, RF_SHAREABLE | RF_ACTIVE); 3272 if (sc->irq_res == NULL) { 3273 device_printf(dev, "could not alloc interrupt\n"); 3274 goto abort_with_dmabench; 3275 } 3276 if (mxge_verbose) 3277 device_printf(dev, "using %s irq %ld\n", 3278 sc->msi_enabled ? "MSI" : "INTx", 3279 rman_get_start(sc->irq_res)); 3280 /* select & load the firmware */ 3281 err = mxge_select_firmware(sc); 3282 if (err != 0) 3283 goto abort_with_irq_res; 3284 sc->intr_coal_delay = mxge_intr_coal_delay; 3285 err = mxge_reset(sc, 0); 3286 if (err != 0) 3287 goto abort_with_irq_res; 3288 3289 err = mxge_alloc_rings(sc); 3290 if (err != 0) { 3291 device_printf(sc->dev, "failed to allocate rings\n"); 3292 goto abort_with_irq_res; 3293 } 3294 3295 err = bus_setup_intr(sc->dev, sc->irq_res, 3296 INTR_TYPE_NET | INTR_MPSAFE, 3297 NULL, mxge_intr, sc, &sc->ih); 3298 if (err != 0) { 3299 goto abort_with_rings; 3300 } 3301 /* hook into the network stack */ 3302 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3303 ifp->if_baudrate = 100000000; 3304 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 3305 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 3306 3307 sc->max_mtu = mxge_max_mtu(sc); 3308 if (sc->max_mtu >= 9000) 3309 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 3310 else 3311 device_printf(dev, "MTU limited to %d. Install " 3312 "latest firmware for 9000 byte jumbo support\n", 3313 sc->max_mtu - ETHER_HDR_LEN); 3314 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 3315 ifp->if_capenable = ifp->if_capabilities; 3316 sc->csum_flag = 1; 3317 ifp->if_init = mxge_init; 3318 ifp->if_softc = sc; 3319 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3320 ifp->if_ioctl = mxge_ioctl; 3321 ifp->if_start = mxge_start; 3322 ether_ifattach(ifp, sc->mac_addr); 3323 /* ether_ifattach sets mtu to 1500 */ 3324 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 3325 ifp->if_mtu = 9000; 3326 3327 /* Initialise the ifmedia structure */ 3328 ifmedia_init(&sc->media, 0, mxge_media_change, 3329 mxge_media_status); 3330 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 3331 mxge_add_sysctls(sc); 3332 return 0; 3333 3334 abort_with_rings: 3335 mxge_free_rings(sc); 3336 abort_with_irq_res: 3337 bus_release_resource(dev, SYS_RES_IRQ, 3338 sc->msi_enabled ? 1 : 0, sc->irq_res); 3339 if (sc->msi_enabled) 3340 pci_release_msi(dev); 3341 abort_with_dmabench: 3342 mxge_dma_free(&sc->dmabench_dma); 3343 abort_with_fw_stats: 3344 mxge_dma_free(&sc->fw_stats_dma); 3345 abort_with_zeropad_dma: 3346 mxge_dma_free(&sc->zeropad_dma); 3347 abort_with_cmd_dma: 3348 mxge_dma_free(&sc->cmd_dma); 3349 abort_with_mem_res: 3350 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3351 abort_with_lock: 3352 pci_disable_busmaster(dev); 3353 mtx_destroy(&sc->cmd_mtx); 3354 mtx_destroy(&sc->tx_mtx); 3355 mtx_destroy(&sc->driver_mtx); 3356 if_free(ifp); 3357 abort_with_parent_dmat: 3358 bus_dma_tag_destroy(sc->parent_dmat); 3359 3360 abort_with_nothing: 3361 return err; 3362 } 3363 3364 static int 3365 mxge_detach(device_t dev) 3366 { 3367 mxge_softc_t *sc = device_get_softc(dev); 3368 3369 if (sc->ifp->if_vlantrunk != NULL) { 3370 device_printf(sc->dev, 3371 "Detach vlans before removing module\n"); 3372 return EBUSY; 3373 } 3374 mtx_lock(&sc->driver_mtx); 3375 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3376 mxge_close(sc); 3377 callout_stop(&sc->co_hdl); 3378 mtx_unlock(&sc->driver_mtx); 3379 ether_ifdetach(sc->ifp); 3380 ifmedia_removeall(&sc->media); 3381 mxge_dummy_rdma(sc, 0); 3382 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 3383 mxge_free_rings(sc); 3384 bus_release_resource(dev, SYS_RES_IRQ, 3385 sc->msi_enabled ? 1 : 0, sc->irq_res); 3386 if (sc->msi_enabled) 3387 pci_release_msi(dev); 3388 3389 sc->rx_done.entry = NULL; 3390 mxge_dma_free(&sc->rx_done.dma); 3391 mxge_dma_free(&sc->fw_stats_dma); 3392 mxge_dma_free(&sc->dmabench_dma); 3393 mxge_dma_free(&sc->zeropad_dma); 3394 mxge_dma_free(&sc->cmd_dma); 3395 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3396 pci_disable_busmaster(dev); 3397 mtx_destroy(&sc->cmd_mtx); 3398 mtx_destroy(&sc->tx_mtx); 3399 mtx_destroy(&sc->driver_mtx); 3400 if_free(sc->ifp); 3401 bus_dma_tag_destroy(sc->parent_dmat); 3402 return 0; 3403 } 3404 3405 static int 3406 mxge_shutdown(device_t dev) 3407 { 3408 return 0; 3409 } 3410 3411 /* 3412 This file uses Myri10GE driver indentation. 3413 3414 Local Variables: 3415 c-file-style:"linux" 3416 tab-width:8 3417 End: 3418 */ 3419