1 /****************************************************************************** 2 3 Copyright (c) 2006-2008, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/sx.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ethernet.h> 52 #include <net/if_dl.h> 53 #include <net/if_media.h> 54 55 #include <net/bpf.h> 56 57 #include <net/if_types.h> 58 #include <net/if_vlan_var.h> 59 #include <net/zlib.h> 60 61 #include <netinet/in_systm.h> 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/tcp.h> 65 66 #include <machine/bus.h> 67 #include <machine/in_cksum.h> 68 #include <machine/resource.h> 69 #include <sys/bus.h> 70 #include <sys/rman.h> 71 #include <sys/smp.h> 72 73 #include <dev/pci/pcireg.h> 74 #include <dev/pci/pcivar.h> 75 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 76 77 #include <vm/vm.h> /* for pmap_mapdev() */ 78 #include <vm/pmap.h> 79 80 #if defined(__i386) || defined(__amd64) 81 #include <machine/specialreg.h> 82 #endif 83 84 #include <dev/mxge/mxge_mcp.h> 85 #include <dev/mxge/mcp_gen_header.h> 86 /*#define MXGE_FAKE_IFP*/ 87 #include <dev/mxge/if_mxge_var.h> 88 89 /* tunable params */ 90 static int mxge_nvidia_ecrc_enable = 1; 91 static int mxge_force_firmware = 0; 92 static int mxge_intr_coal_delay = 30; 93 static int mxge_deassert_wait = 1; 94 static int mxge_flow_control = 1; 95 static int mxge_verbose = 0; 96 static int mxge_lro_cnt = 8; 97 static int mxge_ticks; 98 static int mxge_max_slices = 1; 99 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 100 static int mxge_always_promisc = 0; 101 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 102 static char *mxge_fw_aligned = "mxge_eth_z8e"; 103 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 104 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 105 106 static int mxge_probe(device_t dev); 107 static int mxge_attach(device_t dev); 108 static int mxge_detach(device_t dev); 109 static int mxge_shutdown(device_t dev); 110 static void mxge_intr(void *arg); 111 112 static device_method_t mxge_methods[] = 113 { 114 /* Device interface */ 115 DEVMETHOD(device_probe, mxge_probe), 116 DEVMETHOD(device_attach, mxge_attach), 117 DEVMETHOD(device_detach, mxge_detach), 118 DEVMETHOD(device_shutdown, mxge_shutdown), 119 {0, 0} 120 }; 121 122 static driver_t mxge_driver = 123 { 124 "mxge", 125 mxge_methods, 126 sizeof(mxge_softc_t), 127 }; 128 129 static devclass_t mxge_devclass; 130 131 /* Declare ourselves to be a child of the PCI bus.*/ 132 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 133 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 134 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 135 136 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 137 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 138 static int mxge_close(mxge_softc_t *sc); 139 static int mxge_open(mxge_softc_t *sc); 140 static void mxge_tick(void *arg); 141 142 static int 143 mxge_probe(device_t dev) 144 { 145 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 146 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 147 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 148 device_set_desc(dev, "Myri10G-PCIE-8A"); 149 return 0; 150 } 151 return ENXIO; 152 } 153 154 static void 155 mxge_enable_wc(mxge_softc_t *sc) 156 { 157 #if defined(__i386) || defined(__amd64) 158 vm_offset_t len; 159 int err; 160 161 sc->wc = 1; 162 len = rman_get_size(sc->mem_res); 163 err = pmap_change_attr((vm_offset_t) sc->sram, 164 len, PAT_WRITE_COMBINING); 165 if (err != 0) { 166 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 167 err); 168 sc->wc = 0; 169 } 170 #endif 171 } 172 173 174 /* callback to get our DMA address */ 175 static void 176 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 177 int error) 178 { 179 if (error == 0) { 180 *(bus_addr_t *) arg = segs->ds_addr; 181 } 182 } 183 184 static int 185 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 186 bus_size_t alignment) 187 { 188 int err; 189 device_t dev = sc->dev; 190 bus_size_t boundary, maxsegsize; 191 192 if (bytes > 4096 && alignment == 4096) { 193 boundary = 0; 194 maxsegsize = bytes; 195 } else { 196 boundary = 4096; 197 maxsegsize = 4096; 198 } 199 200 /* allocate DMAable memory tags */ 201 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 202 alignment, /* alignment */ 203 boundary, /* boundary */ 204 BUS_SPACE_MAXADDR, /* low */ 205 BUS_SPACE_MAXADDR, /* high */ 206 NULL, NULL, /* filter */ 207 bytes, /* maxsize */ 208 1, /* num segs */ 209 maxsegsize, /* maxsegsize */ 210 BUS_DMA_COHERENT, /* flags */ 211 NULL, NULL, /* lock */ 212 &dma->dmat); /* tag */ 213 if (err != 0) { 214 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 215 return err; 216 } 217 218 /* allocate DMAable memory & map */ 219 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 220 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 221 | BUS_DMA_ZERO), &dma->map); 222 if (err != 0) { 223 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 224 goto abort_with_dmat; 225 } 226 227 /* load the memory */ 228 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 229 mxge_dmamap_callback, 230 (void *)&dma->bus_addr, 0); 231 if (err != 0) { 232 device_printf(dev, "couldn't load map (err = %d)\n", err); 233 goto abort_with_mem; 234 } 235 return 0; 236 237 abort_with_mem: 238 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 239 abort_with_dmat: 240 (void)bus_dma_tag_destroy(dma->dmat); 241 return err; 242 } 243 244 245 static void 246 mxge_dma_free(mxge_dma_t *dma) 247 { 248 bus_dmamap_unload(dma->dmat, dma->map); 249 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 250 (void)bus_dma_tag_destroy(dma->dmat); 251 } 252 253 /* 254 * The eeprom strings on the lanaiX have the format 255 * SN=x\0 256 * MAC=x:x:x:x:x:x\0 257 * PC=text\0 258 */ 259 260 static int 261 mxge_parse_strings(mxge_softc_t *sc) 262 { 263 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 264 265 char *ptr, *limit; 266 int i, found_mac; 267 268 ptr = sc->eeprom_strings; 269 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 270 found_mac = 0; 271 while (ptr < limit && *ptr != '\0') { 272 if (memcmp(ptr, "MAC=", 4) == 0) { 273 ptr += 1; 274 sc->mac_addr_string = ptr; 275 for (i = 0; i < 6; i++) { 276 ptr += 3; 277 if ((ptr + 2) > limit) 278 goto abort; 279 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 280 found_mac = 1; 281 } 282 } else if (memcmp(ptr, "PC=", 3) == 0) { 283 ptr += 3; 284 strncpy(sc->product_code_string, ptr, 285 sizeof (sc->product_code_string) - 1); 286 } else if (memcmp(ptr, "SN=", 3) == 0) { 287 ptr += 3; 288 strncpy(sc->serial_number_string, ptr, 289 sizeof (sc->serial_number_string) - 1); 290 } 291 MXGE_NEXT_STRING(ptr); 292 } 293 294 if (found_mac) 295 return 0; 296 297 abort: 298 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 299 300 return ENXIO; 301 } 302 303 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 304 static void 305 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 306 { 307 uint32_t val; 308 unsigned long base, off; 309 char *va, *cfgptr; 310 device_t pdev, mcp55; 311 uint16_t vendor_id, device_id, word; 312 uintptr_t bus, slot, func, ivend, idev; 313 uint32_t *ptr32; 314 315 316 if (!mxge_nvidia_ecrc_enable) 317 return; 318 319 pdev = device_get_parent(device_get_parent(sc->dev)); 320 if (pdev == NULL) { 321 device_printf(sc->dev, "could not find parent?\n"); 322 return; 323 } 324 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 325 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 326 327 if (vendor_id != 0x10de) 328 return; 329 330 base = 0; 331 332 if (device_id == 0x005d) { 333 /* ck804, base address is magic */ 334 base = 0xe0000000UL; 335 } else if (device_id >= 0x0374 && device_id <= 0x378) { 336 /* mcp55, base address stored in chipset */ 337 mcp55 = pci_find_bsf(0, 0, 0); 338 if (mcp55 && 339 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 340 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 341 word = pci_read_config(mcp55, 0x90, 2); 342 base = ((unsigned long)word & 0x7ffeU) << 25; 343 } 344 } 345 if (!base) 346 return; 347 348 /* XXXX 349 Test below is commented because it is believed that doing 350 config read/write beyond 0xff will access the config space 351 for the next larger function. Uncomment this and remove 352 the hacky pmap_mapdev() way of accessing config space when 353 FreeBSD grows support for extended pcie config space access 354 */ 355 #if 0 356 /* See if we can, by some miracle, access the extended 357 config space */ 358 val = pci_read_config(pdev, 0x178, 4); 359 if (val != 0xffffffff) { 360 val |= 0x40; 361 pci_write_config(pdev, 0x178, val, 4); 362 return; 363 } 364 #endif 365 /* Rather than using normal pci config space writes, we must 366 * map the Nvidia config space ourselves. This is because on 367 * opteron/nvidia class machine the 0xe000000 mapping is 368 * handled by the nvidia chipset, that means the internal PCI 369 * device (the on-chip northbridge), or the amd-8131 bridge 370 * and things behind them are not visible by this method. 371 */ 372 373 BUS_READ_IVAR(device_get_parent(pdev), pdev, 374 PCI_IVAR_BUS, &bus); 375 BUS_READ_IVAR(device_get_parent(pdev), pdev, 376 PCI_IVAR_SLOT, &slot); 377 BUS_READ_IVAR(device_get_parent(pdev), pdev, 378 PCI_IVAR_FUNCTION, &func); 379 BUS_READ_IVAR(device_get_parent(pdev), pdev, 380 PCI_IVAR_VENDOR, &ivend); 381 BUS_READ_IVAR(device_get_parent(pdev), pdev, 382 PCI_IVAR_DEVICE, &idev); 383 384 off = base 385 + 0x00100000UL * (unsigned long)bus 386 + 0x00001000UL * (unsigned long)(func 387 + 8 * slot); 388 389 /* map it into the kernel */ 390 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 391 392 393 if (va == NULL) { 394 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 395 return; 396 } 397 /* get a pointer to the config space mapped into the kernel */ 398 cfgptr = va + (off & PAGE_MASK); 399 400 /* make sure that we can really access it */ 401 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 402 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 403 if (! (vendor_id == ivend && device_id == idev)) { 404 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 405 vendor_id, device_id); 406 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 407 return; 408 } 409 410 ptr32 = (uint32_t*)(cfgptr + 0x178); 411 val = *ptr32; 412 413 if (val == 0xffffffff) { 414 device_printf(sc->dev, "extended mapping failed\n"); 415 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 416 return; 417 } 418 *ptr32 = val | 0x40; 419 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 420 if (mxge_verbose) 421 device_printf(sc->dev, 422 "Enabled ECRC on upstream Nvidia bridge " 423 "at %d:%d:%d\n", 424 (int)bus, (int)slot, (int)func); 425 return; 426 } 427 #else 428 static void 429 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 430 { 431 device_printf(sc->dev, 432 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 433 return; 434 } 435 #endif 436 437 438 static int 439 mxge_dma_test(mxge_softc_t *sc, int test_type) 440 { 441 mxge_cmd_t cmd; 442 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 443 int status; 444 uint32_t len; 445 char *test = " "; 446 447 448 /* Run a small DMA test. 449 * The magic multipliers to the length tell the firmware 450 * to do DMA read, write, or read+write tests. The 451 * results are returned in cmd.data0. The upper 16 452 * bits of the return is the number of transfers completed. 453 * The lower 16 bits is the time in 0.5us ticks that the 454 * transfers took to complete. 455 */ 456 457 len = sc->tx_boundary; 458 459 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 460 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 461 cmd.data2 = len * 0x10000; 462 status = mxge_send_cmd(sc, test_type, &cmd); 463 if (status != 0) { 464 test = "read"; 465 goto abort; 466 } 467 sc->read_dma = ((cmd.data0>>16) * len * 2) / 468 (cmd.data0 & 0xffff); 469 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 470 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 471 cmd.data2 = len * 0x1; 472 status = mxge_send_cmd(sc, test_type, &cmd); 473 if (status != 0) { 474 test = "write"; 475 goto abort; 476 } 477 sc->write_dma = ((cmd.data0>>16) * len * 2) / 478 (cmd.data0 & 0xffff); 479 480 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 481 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 482 cmd.data2 = len * 0x10001; 483 status = mxge_send_cmd(sc, test_type, &cmd); 484 if (status != 0) { 485 test = "read/write"; 486 goto abort; 487 } 488 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 489 (cmd.data0 & 0xffff); 490 491 abort: 492 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 493 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 494 test, status); 495 496 return status; 497 } 498 499 /* 500 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 501 * when the PCI-E Completion packets are aligned on an 8-byte 502 * boundary. Some PCI-E chip sets always align Completion packets; on 503 * the ones that do not, the alignment can be enforced by enabling 504 * ECRC generation (if supported). 505 * 506 * When PCI-E Completion packets are not aligned, it is actually more 507 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 508 * 509 * If the driver can neither enable ECRC nor verify that it has 510 * already been enabled, then it must use a firmware image which works 511 * around unaligned completion packets (ethp_z8e.dat), and it should 512 * also ensure that it never gives the device a Read-DMA which is 513 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 514 * enabled, then the driver should use the aligned (eth_z8e.dat) 515 * firmware image, and set tx_boundary to 4KB. 516 */ 517 518 static int 519 mxge_firmware_probe(mxge_softc_t *sc) 520 { 521 device_t dev = sc->dev; 522 int reg, status; 523 uint16_t pectl; 524 525 sc->tx_boundary = 4096; 526 /* 527 * Verify the max read request size was set to 4KB 528 * before trying the test with 4KB. 529 */ 530 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 531 pectl = pci_read_config(dev, reg + 0x8, 2); 532 if ((pectl & (5 << 12)) != (5 << 12)) { 533 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 534 pectl); 535 sc->tx_boundary = 2048; 536 } 537 } 538 539 /* 540 * load the optimized firmware (which assumes aligned PCIe 541 * completions) in order to see if it works on this host. 542 */ 543 sc->fw_name = mxge_fw_aligned; 544 status = mxge_load_firmware(sc, 1); 545 if (status != 0) { 546 return status; 547 } 548 549 /* 550 * Enable ECRC if possible 551 */ 552 mxge_enable_nvidia_ecrc(sc); 553 554 /* 555 * Run a DMA test which watches for unaligned completions and 556 * aborts on the first one seen. 557 */ 558 559 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 560 if (status == 0) 561 return 0; /* keep the aligned firmware */ 562 563 if (status != E2BIG) 564 device_printf(dev, "DMA test failed: %d\n", status); 565 if (status == ENOSYS) 566 device_printf(dev, "Falling back to ethp! " 567 "Please install up to date fw\n"); 568 return status; 569 } 570 571 static int 572 mxge_select_firmware(mxge_softc_t *sc) 573 { 574 int aligned = 0; 575 576 577 if (mxge_force_firmware != 0) { 578 if (mxge_force_firmware == 1) 579 aligned = 1; 580 else 581 aligned = 0; 582 if (mxge_verbose) 583 device_printf(sc->dev, 584 "Assuming %s completions (forced)\n", 585 aligned ? "aligned" : "unaligned"); 586 goto abort; 587 } 588 589 /* if the PCIe link width is 4 or less, we can use the aligned 590 firmware and skip any checks */ 591 if (sc->link_width != 0 && sc->link_width <= 4) { 592 device_printf(sc->dev, 593 "PCIe x%d Link, expect reduced performance\n", 594 sc->link_width); 595 aligned = 1; 596 goto abort; 597 } 598 599 if (0 == mxge_firmware_probe(sc)) 600 return 0; 601 602 abort: 603 if (aligned) { 604 sc->fw_name = mxge_fw_aligned; 605 sc->tx_boundary = 4096; 606 } else { 607 sc->fw_name = mxge_fw_unaligned; 608 sc->tx_boundary = 2048; 609 } 610 return (mxge_load_firmware(sc, 0)); 611 } 612 613 union qualhack 614 { 615 const char *ro_char; 616 char *rw_char; 617 }; 618 619 static int 620 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 621 { 622 623 624 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 625 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 626 be32toh(hdr->mcp_type)); 627 return EIO; 628 } 629 630 /* save firmware version for sysctl */ 631 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 632 if (mxge_verbose) 633 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 634 635 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 636 &sc->fw_ver_minor, &sc->fw_ver_tiny); 637 638 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 639 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 640 device_printf(sc->dev, "Found firmware version %s\n", 641 sc->fw_version); 642 device_printf(sc->dev, "Driver needs %d.%d\n", 643 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 644 return EINVAL; 645 } 646 return 0; 647 648 } 649 650 static void * 651 z_alloc(void *nil, u_int items, u_int size) 652 { 653 void *ptr; 654 655 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 656 return ptr; 657 } 658 659 static void 660 z_free(void *nil, void *ptr) 661 { 662 free(ptr, M_TEMP); 663 } 664 665 666 static int 667 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 668 { 669 z_stream zs; 670 char *inflate_buffer; 671 const struct firmware *fw; 672 const mcp_gen_header_t *hdr; 673 unsigned hdr_offset; 674 int status; 675 unsigned int i; 676 char dummy; 677 size_t fw_len; 678 679 fw = firmware_get(sc->fw_name); 680 if (fw == NULL) { 681 device_printf(sc->dev, "Could not find firmware image %s\n", 682 sc->fw_name); 683 return ENOENT; 684 } 685 686 687 688 /* setup zlib and decompress f/w */ 689 bzero(&zs, sizeof (zs)); 690 zs.zalloc = z_alloc; 691 zs.zfree = z_free; 692 status = inflateInit(&zs); 693 if (status != Z_OK) { 694 status = EIO; 695 goto abort_with_fw; 696 } 697 698 /* the uncompressed size is stored as the firmware version, 699 which would otherwise go unused */ 700 fw_len = (size_t) fw->version; 701 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 702 if (inflate_buffer == NULL) 703 goto abort_with_zs; 704 zs.avail_in = fw->datasize; 705 zs.next_in = __DECONST(char *, fw->data); 706 zs.avail_out = fw_len; 707 zs.next_out = inflate_buffer; 708 status = inflate(&zs, Z_FINISH); 709 if (status != Z_STREAM_END) { 710 device_printf(sc->dev, "zlib %d\n", status); 711 status = EIO; 712 goto abort_with_buffer; 713 } 714 715 /* check id */ 716 hdr_offset = htobe32(*(const uint32_t *) 717 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 718 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 719 device_printf(sc->dev, "Bad firmware file"); 720 status = EIO; 721 goto abort_with_buffer; 722 } 723 hdr = (const void*)(inflate_buffer + hdr_offset); 724 725 status = mxge_validate_firmware(sc, hdr); 726 if (status != 0) 727 goto abort_with_buffer; 728 729 /* Copy the inflated firmware to NIC SRAM. */ 730 for (i = 0; i < fw_len; i += 256) { 731 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 732 inflate_buffer + i, 733 min(256U, (unsigned)(fw_len - i))); 734 wmb(); 735 dummy = *sc->sram; 736 wmb(); 737 } 738 739 *limit = fw_len; 740 status = 0; 741 abort_with_buffer: 742 free(inflate_buffer, M_TEMP); 743 abort_with_zs: 744 inflateEnd(&zs); 745 abort_with_fw: 746 firmware_put(fw, FIRMWARE_UNLOAD); 747 return status; 748 } 749 750 /* 751 * Enable or disable periodic RDMAs from the host to make certain 752 * chipsets resend dropped PCIe messages 753 */ 754 755 static void 756 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 757 { 758 char buf_bytes[72]; 759 volatile uint32_t *confirm; 760 volatile char *submit; 761 uint32_t *buf, dma_low, dma_high; 762 int i; 763 764 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 765 766 /* clear confirmation addr */ 767 confirm = (volatile uint32_t *)sc->cmd; 768 *confirm = 0; 769 wmb(); 770 771 /* send an rdma command to the PCIe engine, and wait for the 772 response in the confirmation address. The firmware should 773 write a -1 there to indicate it is alive and well 774 */ 775 776 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 777 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 778 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 779 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 780 buf[2] = htobe32(0xffffffff); /* confirm data */ 781 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 782 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 783 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 784 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 785 buf[5] = htobe32(enable); /* enable? */ 786 787 788 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 789 790 mxge_pio_copy(submit, buf, 64); 791 wmb(); 792 DELAY(1000); 793 wmb(); 794 i = 0; 795 while (*confirm != 0xffffffff && i < 20) { 796 DELAY(1000); 797 i++; 798 } 799 if (*confirm != 0xffffffff) { 800 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 801 (enable ? "enable" : "disable"), confirm, 802 *confirm); 803 } 804 return; 805 } 806 807 static int 808 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 809 { 810 mcp_cmd_t *buf; 811 char buf_bytes[sizeof(*buf) + 8]; 812 volatile mcp_cmd_response_t *response = sc->cmd; 813 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 814 uint32_t dma_low, dma_high; 815 int err, sleep_total = 0; 816 817 /* ensure buf is aligned to 8 bytes */ 818 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 819 820 buf->data0 = htobe32(data->data0); 821 buf->data1 = htobe32(data->data1); 822 buf->data2 = htobe32(data->data2); 823 buf->cmd = htobe32(cmd); 824 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 825 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 826 827 buf->response_addr.low = htobe32(dma_low); 828 buf->response_addr.high = htobe32(dma_high); 829 mtx_lock(&sc->cmd_mtx); 830 response->result = 0xffffffff; 831 wmb(); 832 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 833 834 /* wait up to 20ms */ 835 err = EAGAIN; 836 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 837 bus_dmamap_sync(sc->cmd_dma.dmat, 838 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 839 wmb(); 840 switch (be32toh(response->result)) { 841 case 0: 842 data->data0 = be32toh(response->data); 843 err = 0; 844 break; 845 case 0xffffffff: 846 DELAY(1000); 847 break; 848 case MXGEFW_CMD_UNKNOWN: 849 err = ENOSYS; 850 break; 851 case MXGEFW_CMD_ERROR_UNALIGNED: 852 err = E2BIG; 853 break; 854 case MXGEFW_CMD_ERROR_BUSY: 855 err = EBUSY; 856 break; 857 default: 858 device_printf(sc->dev, 859 "mxge: command %d " 860 "failed, result = %d\n", 861 cmd, be32toh(response->result)); 862 err = ENXIO; 863 break; 864 } 865 if (err != EAGAIN) 866 break; 867 } 868 if (err == EAGAIN) 869 device_printf(sc->dev, "mxge: command %d timed out" 870 "result = %d\n", 871 cmd, be32toh(response->result)); 872 mtx_unlock(&sc->cmd_mtx); 873 return err; 874 } 875 876 static int 877 mxge_adopt_running_firmware(mxge_softc_t *sc) 878 { 879 struct mcp_gen_header *hdr; 880 const size_t bytes = sizeof (struct mcp_gen_header); 881 size_t hdr_offset; 882 int status; 883 884 /* find running firmware header */ 885 hdr_offset = htobe32(*(volatile uint32_t *) 886 (sc->sram + MCP_HEADER_PTR_OFFSET)); 887 888 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 889 device_printf(sc->dev, 890 "Running firmware has bad header offset (%d)\n", 891 (int)hdr_offset); 892 return EIO; 893 } 894 895 /* copy header of running firmware from SRAM to host memory to 896 * validate firmware */ 897 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 898 if (hdr == NULL) { 899 device_printf(sc->dev, "could not malloc firmware hdr\n"); 900 return ENOMEM; 901 } 902 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 903 rman_get_bushandle(sc->mem_res), 904 hdr_offset, (char *)hdr, bytes); 905 status = mxge_validate_firmware(sc, hdr); 906 free(hdr, M_DEVBUF); 907 908 /* 909 * check to see if adopted firmware has bug where adopting 910 * it will cause broadcasts to be filtered unless the NIC 911 * is kept in ALLMULTI mode 912 */ 913 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 914 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 915 sc->adopted_rx_filter_bug = 1; 916 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 917 "working around rx filter bug\n", 918 sc->fw_ver_major, sc->fw_ver_minor, 919 sc->fw_ver_tiny); 920 } 921 922 return status; 923 } 924 925 926 static int 927 mxge_load_firmware(mxge_softc_t *sc, int adopt) 928 { 929 volatile uint32_t *confirm; 930 volatile char *submit; 931 char buf_bytes[72]; 932 uint32_t *buf, size, dma_low, dma_high; 933 int status, i; 934 935 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 936 937 size = sc->sram_size; 938 status = mxge_load_firmware_helper(sc, &size); 939 if (status) { 940 if (!adopt) 941 return status; 942 /* Try to use the currently running firmware, if 943 it is new enough */ 944 status = mxge_adopt_running_firmware(sc); 945 if (status) { 946 device_printf(sc->dev, 947 "failed to adopt running firmware\n"); 948 return status; 949 } 950 device_printf(sc->dev, 951 "Successfully adopted running firmware\n"); 952 if (sc->tx_boundary == 4096) { 953 device_printf(sc->dev, 954 "Using firmware currently running on NIC" 955 ". For optimal\n"); 956 device_printf(sc->dev, 957 "performance consider loading optimized " 958 "firmware\n"); 959 } 960 sc->fw_name = mxge_fw_unaligned; 961 sc->tx_boundary = 2048; 962 return 0; 963 } 964 /* clear confirmation addr */ 965 confirm = (volatile uint32_t *)sc->cmd; 966 *confirm = 0; 967 wmb(); 968 /* send a reload command to the bootstrap MCP, and wait for the 969 response in the confirmation address. The firmware should 970 write a -1 there to indicate it is alive and well 971 */ 972 973 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 974 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 975 976 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 977 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 978 buf[2] = htobe32(0xffffffff); /* confirm data */ 979 980 /* FIX: All newest firmware should un-protect the bottom of 981 the sram before handoff. However, the very first interfaces 982 do not. Therefore the handoff copy must skip the first 8 bytes 983 */ 984 /* where the code starts*/ 985 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 986 buf[4] = htobe32(size - 8); /* length of code */ 987 buf[5] = htobe32(8); /* where to copy to */ 988 buf[6] = htobe32(0); /* where to jump to */ 989 990 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 991 mxge_pio_copy(submit, buf, 64); 992 wmb(); 993 DELAY(1000); 994 wmb(); 995 i = 0; 996 while (*confirm != 0xffffffff && i < 20) { 997 DELAY(1000*10); 998 i++; 999 bus_dmamap_sync(sc->cmd_dma.dmat, 1000 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1001 } 1002 if (*confirm != 0xffffffff) { 1003 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1004 confirm, *confirm); 1005 1006 return ENXIO; 1007 } 1008 return 0; 1009 } 1010 1011 static int 1012 mxge_update_mac_address(mxge_softc_t *sc) 1013 { 1014 mxge_cmd_t cmd; 1015 uint8_t *addr = sc->mac_addr; 1016 int status; 1017 1018 1019 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1020 | (addr[2] << 8) | addr[3]); 1021 1022 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1023 1024 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1025 return status; 1026 } 1027 1028 static int 1029 mxge_change_pause(mxge_softc_t *sc, int pause) 1030 { 1031 mxge_cmd_t cmd; 1032 int status; 1033 1034 if (pause) 1035 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1036 &cmd); 1037 else 1038 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1039 &cmd); 1040 1041 if (status) { 1042 device_printf(sc->dev, "Failed to set flow control mode\n"); 1043 return ENXIO; 1044 } 1045 sc->pause = pause; 1046 return 0; 1047 } 1048 1049 static void 1050 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1051 { 1052 mxge_cmd_t cmd; 1053 int status; 1054 1055 if (mxge_always_promisc) 1056 promisc = 1; 1057 1058 if (promisc) 1059 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1060 &cmd); 1061 else 1062 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1063 &cmd); 1064 1065 if (status) { 1066 device_printf(sc->dev, "Failed to set promisc mode\n"); 1067 } 1068 } 1069 1070 static void 1071 mxge_set_multicast_list(mxge_softc_t *sc) 1072 { 1073 mxge_cmd_t cmd; 1074 struct ifmultiaddr *ifma; 1075 struct ifnet *ifp = sc->ifp; 1076 int err; 1077 1078 /* This firmware is known to not support multicast */ 1079 if (!sc->fw_multicast_support) 1080 return; 1081 1082 /* Disable multicast filtering while we play with the lists*/ 1083 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1084 if (err != 0) { 1085 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1086 " error status: %d\n", err); 1087 return; 1088 } 1089 1090 if (sc->adopted_rx_filter_bug) 1091 return; 1092 1093 if (ifp->if_flags & IFF_ALLMULTI) 1094 /* request to disable multicast filtering, so quit here */ 1095 return; 1096 1097 /* Flush all the filters */ 1098 1099 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1100 if (err != 0) { 1101 device_printf(sc->dev, 1102 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1103 ", error status: %d\n", err); 1104 return; 1105 } 1106 1107 /* Walk the multicast list, and add each address */ 1108 1109 IF_ADDR_LOCK(ifp); 1110 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1111 if (ifma->ifma_addr->sa_family != AF_LINK) 1112 continue; 1113 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1114 &cmd.data0, 4); 1115 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1116 &cmd.data1, 2); 1117 cmd.data0 = htonl(cmd.data0); 1118 cmd.data1 = htonl(cmd.data1); 1119 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1120 if (err != 0) { 1121 device_printf(sc->dev, "Failed " 1122 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1123 "%d\t", err); 1124 /* abort, leaving multicast filtering off */ 1125 IF_ADDR_UNLOCK(ifp); 1126 return; 1127 } 1128 } 1129 IF_ADDR_UNLOCK(ifp); 1130 /* Enable multicast filtering */ 1131 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1132 if (err != 0) { 1133 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1134 ", error status: %d\n", err); 1135 } 1136 } 1137 1138 static int 1139 mxge_max_mtu(mxge_softc_t *sc) 1140 { 1141 mxge_cmd_t cmd; 1142 int status; 1143 1144 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1145 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1146 1147 /* try to set nbufs to see if it we can 1148 use virtually contiguous jumbos */ 1149 cmd.data0 = 0; 1150 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1151 &cmd); 1152 if (status == 0) 1153 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1154 1155 /* otherwise, we're limited to MJUMPAGESIZE */ 1156 return MJUMPAGESIZE - MXGEFW_PAD; 1157 } 1158 1159 static int 1160 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1161 { 1162 struct mxge_slice_state *ss; 1163 mxge_rx_done_t *rx_done; 1164 volatile uint32_t *irq_claim; 1165 mxge_cmd_t cmd; 1166 int slice, status; 1167 1168 /* try to send a reset command to the card to see if it 1169 is alive */ 1170 memset(&cmd, 0, sizeof (cmd)); 1171 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1172 if (status != 0) { 1173 device_printf(sc->dev, "failed reset\n"); 1174 return ENXIO; 1175 } 1176 1177 mxge_dummy_rdma(sc, 1); 1178 1179 1180 /* set the intrq size */ 1181 cmd.data0 = sc->rx_ring_size; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1183 1184 /* 1185 * Even though we already know how many slices are supported 1186 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1187 * has magic side effects, and must be called after a reset. 1188 * It must be called prior to calling any RSS related cmds, 1189 * including assigning an interrupt queue for anything but 1190 * slice 0. It must also be called *after* 1191 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1192 * the firmware to compute offsets. 1193 */ 1194 1195 if (sc->num_slices > 1) { 1196 /* ask the maximum number of slices it supports */ 1197 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1198 &cmd); 1199 if (status != 0) { 1200 device_printf(sc->dev, 1201 "failed to get number of slices\n"); 1202 return status; 1203 } 1204 /* 1205 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1206 * to setting up the interrupt queue DMA 1207 */ 1208 cmd.data0 = sc->num_slices; 1209 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1210 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1211 &cmd); 1212 if (status != 0) { 1213 device_printf(sc->dev, 1214 "failed to set number of slices\n"); 1215 return status; 1216 } 1217 } 1218 1219 1220 if (interrupts_setup) { 1221 /* Now exchange information about interrupts */ 1222 for (slice = 0; slice < sc->num_slices; slice++) { 1223 rx_done = &sc->ss[slice].rx_done; 1224 memset(rx_done->entry, 0, sc->rx_ring_size); 1225 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1226 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1227 cmd.data2 = slice; 1228 status |= mxge_send_cmd(sc, 1229 MXGEFW_CMD_SET_INTRQ_DMA, 1230 &cmd); 1231 } 1232 } 1233 1234 status |= mxge_send_cmd(sc, 1235 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1236 1237 1238 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1239 1240 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1241 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1242 1243 1244 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1245 &cmd); 1246 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1247 if (status != 0) { 1248 device_printf(sc->dev, "failed set interrupt parameters\n"); 1249 return status; 1250 } 1251 1252 1253 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1254 1255 1256 /* run a DMA benchmark */ 1257 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1258 1259 for (slice = 0; slice < sc->num_slices; slice++) { 1260 ss = &sc->ss[slice]; 1261 1262 ss->irq_claim = irq_claim + (2 * slice); 1263 /* reset mcp/driver shared state back to 0 */ 1264 ss->rx_done.idx = 0; 1265 ss->rx_done.cnt = 0; 1266 ss->tx.req = 0; 1267 ss->tx.done = 0; 1268 ss->tx.pkt_done = 0; 1269 ss->tx.wake = 0; 1270 ss->tx.defrag = 0; 1271 ss->tx.stall = 0; 1272 ss->rx_big.cnt = 0; 1273 ss->rx_small.cnt = 0; 1274 ss->lro_bad_csum = 0; 1275 ss->lro_queued = 0; 1276 ss->lro_flushed = 0; 1277 if (ss->fw_stats != NULL) { 1278 ss->fw_stats->valid = 0; 1279 ss->fw_stats->send_done_count = 0; 1280 } 1281 } 1282 sc->rdma_tags_available = 15; 1283 status = mxge_update_mac_address(sc); 1284 mxge_change_promisc(sc, 0); 1285 mxge_change_pause(sc, sc->pause); 1286 mxge_set_multicast_list(sc); 1287 return status; 1288 } 1289 1290 static int 1291 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1292 { 1293 mxge_softc_t *sc; 1294 unsigned int intr_coal_delay; 1295 int err; 1296 1297 sc = arg1; 1298 intr_coal_delay = sc->intr_coal_delay; 1299 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1300 if (err != 0) { 1301 return err; 1302 } 1303 if (intr_coal_delay == sc->intr_coal_delay) 1304 return 0; 1305 1306 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1307 return EINVAL; 1308 1309 mtx_lock(&sc->driver_mtx); 1310 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1311 sc->intr_coal_delay = intr_coal_delay; 1312 1313 mtx_unlock(&sc->driver_mtx); 1314 return err; 1315 } 1316 1317 static int 1318 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1319 { 1320 mxge_softc_t *sc; 1321 unsigned int enabled; 1322 int err; 1323 1324 sc = arg1; 1325 enabled = sc->pause; 1326 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1327 if (err != 0) { 1328 return err; 1329 } 1330 if (enabled == sc->pause) 1331 return 0; 1332 1333 mtx_lock(&sc->driver_mtx); 1334 err = mxge_change_pause(sc, enabled); 1335 mtx_unlock(&sc->driver_mtx); 1336 return err; 1337 } 1338 1339 static int 1340 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1341 { 1342 struct ifnet *ifp; 1343 int err = 0; 1344 1345 ifp = sc->ifp; 1346 if (lro_cnt == 0) 1347 ifp->if_capenable &= ~IFCAP_LRO; 1348 else 1349 ifp->if_capenable |= IFCAP_LRO; 1350 sc->lro_cnt = lro_cnt; 1351 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1352 mxge_close(sc); 1353 err = mxge_open(sc); 1354 } 1355 return err; 1356 } 1357 1358 static int 1359 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1360 { 1361 mxge_softc_t *sc; 1362 unsigned int lro_cnt; 1363 int err; 1364 1365 sc = arg1; 1366 lro_cnt = sc->lro_cnt; 1367 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1368 if (err != 0) 1369 return err; 1370 1371 if (lro_cnt == sc->lro_cnt) 1372 return 0; 1373 1374 if (lro_cnt > 128) 1375 return EINVAL; 1376 1377 mtx_lock(&sc->driver_mtx); 1378 err = mxge_change_lro_locked(sc, lro_cnt); 1379 mtx_unlock(&sc->driver_mtx); 1380 return err; 1381 } 1382 1383 static int 1384 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1385 { 1386 int err; 1387 1388 if (arg1 == NULL) 1389 return EFAULT; 1390 arg2 = be32toh(*(int *)arg1); 1391 arg1 = NULL; 1392 err = sysctl_handle_int(oidp, arg1, arg2, req); 1393 1394 return err; 1395 } 1396 1397 static void 1398 mxge_rem_sysctls(mxge_softc_t *sc) 1399 { 1400 struct mxge_slice_state *ss; 1401 int slice; 1402 1403 if (sc->slice_sysctl_tree == NULL) 1404 return; 1405 1406 for (slice = 0; slice < sc->num_slices; slice++) { 1407 ss = &sc->ss[slice]; 1408 if (ss == NULL || ss->sysctl_tree == NULL) 1409 continue; 1410 sysctl_ctx_free(&ss->sysctl_ctx); 1411 ss->sysctl_tree = NULL; 1412 } 1413 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1414 sc->slice_sysctl_tree = NULL; 1415 } 1416 1417 static void 1418 mxge_add_sysctls(mxge_softc_t *sc) 1419 { 1420 struct sysctl_ctx_list *ctx; 1421 struct sysctl_oid_list *children; 1422 mcp_irq_data_t *fw; 1423 struct mxge_slice_state *ss; 1424 int slice; 1425 char slice_num[8]; 1426 1427 ctx = device_get_sysctl_ctx(sc->dev); 1428 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1429 fw = sc->ss[0].fw_stats; 1430 1431 /* random information */ 1432 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1433 "firmware_version", 1434 CTLFLAG_RD, &sc->fw_version, 1435 0, "firmware version"); 1436 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1437 "serial_number", 1438 CTLFLAG_RD, &sc->serial_number_string, 1439 0, "serial number"); 1440 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1441 "product_code", 1442 CTLFLAG_RD, &sc->product_code_string, 1443 0, "product_code"); 1444 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1445 "pcie_link_width", 1446 CTLFLAG_RD, &sc->link_width, 1447 0, "tx_boundary"); 1448 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1449 "tx_boundary", 1450 CTLFLAG_RD, &sc->tx_boundary, 1451 0, "tx_boundary"); 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1453 "write_combine", 1454 CTLFLAG_RD, &sc->wc, 1455 0, "write combining PIO?"); 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1457 "read_dma_MBs", 1458 CTLFLAG_RD, &sc->read_dma, 1459 0, "DMA Read speed in MB/s"); 1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1461 "write_dma_MBs", 1462 CTLFLAG_RD, &sc->write_dma, 1463 0, "DMA Write speed in MB/s"); 1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1465 "read_write_dma_MBs", 1466 CTLFLAG_RD, &sc->read_write_dma, 1467 0, "DMA concurrent Read/Write speed in MB/s"); 1468 1469 1470 /* performance related tunables */ 1471 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1472 "intr_coal_delay", 1473 CTLTYPE_INT|CTLFLAG_RW, sc, 1474 0, mxge_change_intr_coal, 1475 "I", "interrupt coalescing delay in usecs"); 1476 1477 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1478 "flow_control_enabled", 1479 CTLTYPE_INT|CTLFLAG_RW, sc, 1480 0, mxge_change_flow_control, 1481 "I", "interrupt coalescing delay in usecs"); 1482 1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1484 "deassert_wait", 1485 CTLFLAG_RW, &mxge_deassert_wait, 1486 0, "Wait for IRQ line to go low in ihandler"); 1487 1488 /* stats block from firmware is in network byte order. 1489 Need to swap it */ 1490 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1491 "link_up", 1492 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1493 0, mxge_handle_be32, 1494 "I", "link up"); 1495 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1496 "rdma_tags_available", 1497 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1498 0, mxge_handle_be32, 1499 "I", "rdma_tags_available"); 1500 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1501 "dropped_bad_crc32", 1502 CTLTYPE_INT|CTLFLAG_RD, 1503 &fw->dropped_bad_crc32, 1504 0, mxge_handle_be32, 1505 "I", "dropped_bad_crc32"); 1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1507 "dropped_bad_phy", 1508 CTLTYPE_INT|CTLFLAG_RD, 1509 &fw->dropped_bad_phy, 1510 0, mxge_handle_be32, 1511 "I", "dropped_bad_phy"); 1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1513 "dropped_link_error_or_filtered", 1514 CTLTYPE_INT|CTLFLAG_RD, 1515 &fw->dropped_link_error_or_filtered, 1516 0, mxge_handle_be32, 1517 "I", "dropped_link_error_or_filtered"); 1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1519 "dropped_link_overflow", 1520 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1521 0, mxge_handle_be32, 1522 "I", "dropped_link_overflow"); 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1524 "dropped_multicast_filtered", 1525 CTLTYPE_INT|CTLFLAG_RD, 1526 &fw->dropped_multicast_filtered, 1527 0, mxge_handle_be32, 1528 "I", "dropped_multicast_filtered"); 1529 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1530 "dropped_no_big_buffer", 1531 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1532 0, mxge_handle_be32, 1533 "I", "dropped_no_big_buffer"); 1534 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1535 "dropped_no_small_buffer", 1536 CTLTYPE_INT|CTLFLAG_RD, 1537 &fw->dropped_no_small_buffer, 1538 0, mxge_handle_be32, 1539 "I", "dropped_no_small_buffer"); 1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1541 "dropped_overrun", 1542 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1543 0, mxge_handle_be32, 1544 "I", "dropped_overrun"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_pause", 1547 CTLTYPE_INT|CTLFLAG_RD, 1548 &fw->dropped_pause, 1549 0, mxge_handle_be32, 1550 "I", "dropped_pause"); 1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1552 "dropped_runt", 1553 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1554 0, mxge_handle_be32, 1555 "I", "dropped_runt"); 1556 1557 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1558 "dropped_unicast_filtered", 1559 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1560 0, mxge_handle_be32, 1561 "I", "dropped_unicast_filtered"); 1562 1563 /* verbose printing? */ 1564 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1565 "verbose", 1566 CTLFLAG_RW, &mxge_verbose, 1567 0, "verbose printing"); 1568 1569 /* lro */ 1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1571 "lro_cnt", 1572 CTLTYPE_INT|CTLFLAG_RW, sc, 1573 0, mxge_change_lro, 1574 "I", "number of lro merge queues"); 1575 1576 1577 /* add counters exported for debugging from all slices */ 1578 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1579 sc->slice_sysctl_tree = 1580 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1581 "slice", CTLFLAG_RD, 0, ""); 1582 1583 for (slice = 0; slice < sc->num_slices; slice++) { 1584 ss = &sc->ss[slice]; 1585 sysctl_ctx_init(&ss->sysctl_ctx); 1586 ctx = &ss->sysctl_ctx; 1587 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1588 sprintf(slice_num, "%d", slice); 1589 ss->sysctl_tree = 1590 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1591 CTLFLAG_RD, 0, ""); 1592 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1593 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1594 "rx_small_cnt", 1595 CTLFLAG_RD, &ss->rx_small.cnt, 1596 0, "rx_small_cnt"); 1597 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1598 "rx_big_cnt", 1599 CTLFLAG_RD, &ss->rx_big.cnt, 1600 0, "rx_small_cnt"); 1601 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1602 "tx_req", 1603 CTLFLAG_RD, &ss->tx.req, 1604 0, "tx_req"); 1605 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1606 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1607 0, "number of lro merge queues flushed"); 1608 1609 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1610 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1611 0, "number of frames appended to lro merge" 1612 "queues"); 1613 1614 /* only transmit from slice 0 for now */ 1615 if (slice > 0) 1616 continue; 1617 1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1619 "tx_done", 1620 CTLFLAG_RD, &ss->tx.done, 1621 0, "tx_done"); 1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1623 "tx_pkt_done", 1624 CTLFLAG_RD, &ss->tx.pkt_done, 1625 0, "tx_done"); 1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1627 "tx_stall", 1628 CTLFLAG_RD, &ss->tx.stall, 1629 0, "tx_stall"); 1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1631 "tx_wake", 1632 CTLFLAG_RD, &ss->tx.wake, 1633 0, "tx_wake"); 1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1635 "tx_defrag", 1636 CTLFLAG_RD, &ss->tx.defrag, 1637 0, "tx_defrag"); 1638 } 1639 } 1640 1641 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1642 backwards one at a time and handle ring wraps */ 1643 1644 static inline void 1645 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1646 mcp_kreq_ether_send_t *src, int cnt) 1647 { 1648 int idx, starting_slot; 1649 starting_slot = tx->req; 1650 while (cnt > 1) { 1651 cnt--; 1652 idx = (starting_slot + cnt) & tx->mask; 1653 mxge_pio_copy(&tx->lanai[idx], 1654 &src[cnt], sizeof(*src)); 1655 wmb(); 1656 } 1657 } 1658 1659 /* 1660 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1661 * at most 32 bytes at a time, so as to avoid involving the software 1662 * pio handler in the nic. We re-write the first segment's flags 1663 * to mark them valid only after writing the entire chain 1664 */ 1665 1666 static inline void 1667 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1668 int cnt) 1669 { 1670 int idx, i; 1671 uint32_t *src_ints; 1672 volatile uint32_t *dst_ints; 1673 mcp_kreq_ether_send_t *srcp; 1674 volatile mcp_kreq_ether_send_t *dstp, *dst; 1675 uint8_t last_flags; 1676 1677 idx = tx->req & tx->mask; 1678 1679 last_flags = src->flags; 1680 src->flags = 0; 1681 wmb(); 1682 dst = dstp = &tx->lanai[idx]; 1683 srcp = src; 1684 1685 if ((idx + cnt) < tx->mask) { 1686 for (i = 0; i < (cnt - 1); i += 2) { 1687 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1688 wmb(); /* force write every 32 bytes */ 1689 srcp += 2; 1690 dstp += 2; 1691 } 1692 } else { 1693 /* submit all but the first request, and ensure 1694 that it is submitted below */ 1695 mxge_submit_req_backwards(tx, src, cnt); 1696 i = 0; 1697 } 1698 if (i < cnt) { 1699 /* submit the first request */ 1700 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1701 wmb(); /* barrier before setting valid flag */ 1702 } 1703 1704 /* re-write the last 32-bits with the valid flags */ 1705 src->flags = last_flags; 1706 src_ints = (uint32_t *)src; 1707 src_ints+=3; 1708 dst_ints = (volatile uint32_t *)dst; 1709 dst_ints+=3; 1710 *dst_ints = *src_ints; 1711 tx->req += cnt; 1712 wmb(); 1713 } 1714 1715 #if IFCAP_TSO4 1716 1717 static void 1718 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1719 int busdma_seg_cnt, int ip_off) 1720 { 1721 mxge_tx_ring_t *tx; 1722 mcp_kreq_ether_send_t *req; 1723 bus_dma_segment_t *seg; 1724 struct ip *ip; 1725 struct tcphdr *tcp; 1726 uint32_t low, high_swapped; 1727 int len, seglen, cum_len, cum_len_next; 1728 int next_is_first, chop, cnt, rdma_count, small; 1729 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1730 uint8_t flags, flags_next; 1731 static int once; 1732 1733 mss = m->m_pkthdr.tso_segsz; 1734 1735 /* negative cum_len signifies to the 1736 * send loop that we are still in the 1737 * header portion of the TSO packet. 1738 */ 1739 1740 /* ensure we have the ethernet, IP and TCP 1741 header together in the first mbuf, copy 1742 it to a scratch buffer if not */ 1743 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1744 m_copydata(m, 0, ip_off + sizeof (*ip), 1745 ss->scratch); 1746 ip = (struct ip *)(ss->scratch + ip_off); 1747 } else { 1748 ip = (struct ip *)(mtod(m, char *) + ip_off); 1749 } 1750 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1751 + sizeof (*tcp))) { 1752 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1753 + sizeof (*tcp), ss->scratch); 1754 ip = (struct ip *)(mtod(m, char *) + ip_off); 1755 } 1756 1757 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1758 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1759 1760 /* TSO implies checksum offload on this hardware */ 1761 cksum_offset = ip_off + (ip->ip_hl << 2); 1762 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1763 1764 1765 /* for TSO, pseudo_hdr_offset holds mss. 1766 * The firmware figures out where to put 1767 * the checksum by parsing the header. */ 1768 pseudo_hdr_offset = htobe16(mss); 1769 1770 tx = &ss->tx; 1771 req = tx->req_list; 1772 seg = tx->seg_list; 1773 cnt = 0; 1774 rdma_count = 0; 1775 /* "rdma_count" is the number of RDMAs belonging to the 1776 * current packet BEFORE the current send request. For 1777 * non-TSO packets, this is equal to "count". 1778 * For TSO packets, rdma_count needs to be reset 1779 * to 0 after a segment cut. 1780 * 1781 * The rdma_count field of the send request is 1782 * the number of RDMAs of the packet starting at 1783 * that request. For TSO send requests with one ore more cuts 1784 * in the middle, this is the number of RDMAs starting 1785 * after the last cut in the request. All previous 1786 * segments before the last cut implicitly have 1 RDMA. 1787 * 1788 * Since the number of RDMAs is not known beforehand, 1789 * it must be filled-in retroactively - after each 1790 * segmentation cut or at the end of the entire packet. 1791 */ 1792 1793 while (busdma_seg_cnt) { 1794 /* Break the busdma segment up into pieces*/ 1795 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1796 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1797 len = seg->ds_len; 1798 1799 while (len) { 1800 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1801 seglen = len; 1802 cum_len_next = cum_len + seglen; 1803 (req-rdma_count)->rdma_count = rdma_count + 1; 1804 if (__predict_true(cum_len >= 0)) { 1805 /* payload */ 1806 chop = (cum_len_next > mss); 1807 cum_len_next = cum_len_next % mss; 1808 next_is_first = (cum_len_next == 0); 1809 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1810 flags_next |= next_is_first * 1811 MXGEFW_FLAGS_FIRST; 1812 rdma_count |= -(chop | next_is_first); 1813 rdma_count += chop & !next_is_first; 1814 } else if (cum_len_next >= 0) { 1815 /* header ends */ 1816 rdma_count = -1; 1817 cum_len_next = 0; 1818 seglen = -cum_len; 1819 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1820 flags_next = MXGEFW_FLAGS_TSO_PLD | 1821 MXGEFW_FLAGS_FIRST | 1822 (small * MXGEFW_FLAGS_SMALL); 1823 } 1824 1825 req->addr_high = high_swapped; 1826 req->addr_low = htobe32(low); 1827 req->pseudo_hdr_offset = pseudo_hdr_offset; 1828 req->pad = 0; 1829 req->rdma_count = 1; 1830 req->length = htobe16(seglen); 1831 req->cksum_offset = cksum_offset; 1832 req->flags = flags | ((cum_len & 1) * 1833 MXGEFW_FLAGS_ALIGN_ODD); 1834 low += seglen; 1835 len -= seglen; 1836 cum_len = cum_len_next; 1837 flags = flags_next; 1838 req++; 1839 cnt++; 1840 rdma_count++; 1841 if (__predict_false(cksum_offset > seglen)) 1842 cksum_offset -= seglen; 1843 else 1844 cksum_offset = 0; 1845 if (__predict_false(cnt > tx->max_desc)) 1846 goto drop; 1847 } 1848 busdma_seg_cnt--; 1849 seg++; 1850 } 1851 (req-rdma_count)->rdma_count = rdma_count; 1852 1853 do { 1854 req--; 1855 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1856 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1857 1858 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1859 mxge_submit_req(tx, tx->req_list, cnt); 1860 return; 1861 1862 drop: 1863 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1864 m_freem(m); 1865 ss->sc->ifp->if_oerrors++; 1866 if (!once) { 1867 printf("tx->max_desc exceeded via TSO!\n"); 1868 printf("mss = %d, %ld, %d!\n", mss, 1869 (long)seg - (long)tx->seg_list, tx->max_desc); 1870 once = 1; 1871 } 1872 return; 1873 1874 } 1875 1876 #endif /* IFCAP_TSO4 */ 1877 1878 #ifdef MXGE_NEW_VLAN_API 1879 /* 1880 * We reproduce the software vlan tag insertion from 1881 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1882 * vlan tag insertion. We need to advertise this in order to have the 1883 * vlan interface respect our csum offload flags. 1884 */ 1885 static struct mbuf * 1886 mxge_vlan_tag_insert(struct mbuf *m) 1887 { 1888 struct ether_vlan_header *evl; 1889 1890 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1891 if (__predict_false(m == NULL)) 1892 return NULL; 1893 if (m->m_len < sizeof(*evl)) { 1894 m = m_pullup(m, sizeof(*evl)); 1895 if (__predict_false(m == NULL)) 1896 return NULL; 1897 } 1898 /* 1899 * Transform the Ethernet header into an Ethernet header 1900 * with 802.1Q encapsulation. 1901 */ 1902 evl = mtod(m, struct ether_vlan_header *); 1903 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1904 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1905 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1906 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1907 m->m_flags &= ~M_VLANTAG; 1908 return m; 1909 } 1910 #endif /* MXGE_NEW_VLAN_API */ 1911 1912 static void 1913 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 1914 { 1915 mxge_softc_t *sc; 1916 mcp_kreq_ether_send_t *req; 1917 bus_dma_segment_t *seg; 1918 struct mbuf *m_tmp; 1919 struct ifnet *ifp; 1920 mxge_tx_ring_t *tx; 1921 struct ip *ip; 1922 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1923 uint16_t pseudo_hdr_offset; 1924 uint8_t flags, cksum_offset; 1925 1926 1927 sc = ss->sc; 1928 ifp = sc->ifp; 1929 tx = &ss->tx; 1930 1931 ip_off = sizeof (struct ether_header); 1932 #ifdef MXGE_NEW_VLAN_API 1933 if (m->m_flags & M_VLANTAG) { 1934 m = mxge_vlan_tag_insert(m); 1935 if (__predict_false(m == NULL)) 1936 goto drop; 1937 ip_off += ETHER_VLAN_ENCAP_LEN; 1938 } 1939 #endif 1940 /* (try to) map the frame for DMA */ 1941 idx = tx->req & tx->mask; 1942 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1943 m, tx->seg_list, &cnt, 1944 BUS_DMA_NOWAIT); 1945 if (__predict_false(err == EFBIG)) { 1946 /* Too many segments in the chain. Try 1947 to defrag */ 1948 m_tmp = m_defrag(m, M_NOWAIT); 1949 if (m_tmp == NULL) { 1950 goto drop; 1951 } 1952 ss->tx.defrag++; 1953 m = m_tmp; 1954 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1955 tx->info[idx].map, 1956 m, tx->seg_list, &cnt, 1957 BUS_DMA_NOWAIT); 1958 } 1959 if (__predict_false(err != 0)) { 1960 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1961 " packet len = %d\n", err, m->m_pkthdr.len); 1962 goto drop; 1963 } 1964 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1965 BUS_DMASYNC_PREWRITE); 1966 tx->info[idx].m = m; 1967 1968 #if IFCAP_TSO4 1969 /* TSO is different enough, we handle it in another routine */ 1970 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1971 mxge_encap_tso(ss, m, cnt, ip_off); 1972 return; 1973 } 1974 #endif 1975 1976 req = tx->req_list; 1977 cksum_offset = 0; 1978 pseudo_hdr_offset = 0; 1979 flags = MXGEFW_FLAGS_NO_TSO; 1980 1981 /* checksum offloading? */ 1982 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1983 /* ensure ip header is in first mbuf, copy 1984 it to a scratch buffer if not */ 1985 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1986 m_copydata(m, 0, ip_off + sizeof (*ip), 1987 ss->scratch); 1988 ip = (struct ip *)(ss->scratch + ip_off); 1989 } else { 1990 ip = (struct ip *)(mtod(m, char *) + ip_off); 1991 } 1992 cksum_offset = ip_off + (ip->ip_hl << 2); 1993 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1994 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1995 req->cksum_offset = cksum_offset; 1996 flags |= MXGEFW_FLAGS_CKSUM; 1997 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1998 } else { 1999 odd_flag = 0; 2000 } 2001 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2002 flags |= MXGEFW_FLAGS_SMALL; 2003 2004 /* convert segments into a request list */ 2005 cum_len = 0; 2006 seg = tx->seg_list; 2007 req->flags = MXGEFW_FLAGS_FIRST; 2008 for (i = 0; i < cnt; i++) { 2009 req->addr_low = 2010 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2011 req->addr_high = 2012 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2013 req->length = htobe16(seg->ds_len); 2014 req->cksum_offset = cksum_offset; 2015 if (cksum_offset > seg->ds_len) 2016 cksum_offset -= seg->ds_len; 2017 else 2018 cksum_offset = 0; 2019 req->pseudo_hdr_offset = pseudo_hdr_offset; 2020 req->pad = 0; /* complete solid 16-byte block */ 2021 req->rdma_count = 1; 2022 req->flags |= flags | ((cum_len & 1) * odd_flag); 2023 cum_len += seg->ds_len; 2024 seg++; 2025 req++; 2026 req->flags = 0; 2027 } 2028 req--; 2029 /* pad runts to 60 bytes */ 2030 if (cum_len < 60) { 2031 req++; 2032 req->addr_low = 2033 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2034 req->addr_high = 2035 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2036 req->length = htobe16(60 - cum_len); 2037 req->cksum_offset = 0; 2038 req->pseudo_hdr_offset = pseudo_hdr_offset; 2039 req->pad = 0; /* complete solid 16-byte block */ 2040 req->rdma_count = 1; 2041 req->flags |= flags | ((cum_len & 1) * odd_flag); 2042 cnt++; 2043 } 2044 2045 tx->req_list[0].rdma_count = cnt; 2046 #if 0 2047 /* print what the firmware will see */ 2048 for (i = 0; i < cnt; i++) { 2049 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2050 "cso:%d, flags:0x%x, rdma:%d\n", 2051 i, (int)ntohl(tx->req_list[i].addr_high), 2052 (int)ntohl(tx->req_list[i].addr_low), 2053 (int)ntohs(tx->req_list[i].length), 2054 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2055 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2056 tx->req_list[i].rdma_count); 2057 } 2058 printf("--------------\n"); 2059 #endif 2060 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2061 mxge_submit_req(tx, tx->req_list, cnt); 2062 return; 2063 2064 drop: 2065 m_freem(m); 2066 ifp->if_oerrors++; 2067 return; 2068 } 2069 2070 2071 2072 2073 static inline void 2074 mxge_start_locked(struct mxge_slice_state *ss) 2075 { 2076 mxge_softc_t *sc; 2077 struct mbuf *m; 2078 struct ifnet *ifp; 2079 mxge_tx_ring_t *tx; 2080 2081 sc = ss->sc; 2082 ifp = sc->ifp; 2083 tx = &ss->tx; 2084 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2085 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2086 if (m == NULL) { 2087 return; 2088 } 2089 /* let BPF see it */ 2090 BPF_MTAP(ifp, m); 2091 2092 /* give it to the nic */ 2093 mxge_encap(ss, m); 2094 } 2095 /* ran out of transmit slots */ 2096 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2097 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2098 tx->stall++; 2099 } 2100 } 2101 2102 static void 2103 mxge_start(struct ifnet *ifp) 2104 { 2105 mxge_softc_t *sc = ifp->if_softc; 2106 struct mxge_slice_state *ss; 2107 2108 /* only use the first slice for now */ 2109 ss = &sc->ss[0]; 2110 mtx_lock(&ss->tx.mtx); 2111 mxge_start_locked(ss); 2112 mtx_unlock(&ss->tx.mtx); 2113 } 2114 2115 /* 2116 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2117 * at most 32 bytes at a time, so as to avoid involving the software 2118 * pio handler in the nic. We re-write the first segment's low 2119 * DMA address to mark it valid only after we write the entire chunk 2120 * in a burst 2121 */ 2122 static inline void 2123 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2124 mcp_kreq_ether_recv_t *src) 2125 { 2126 uint32_t low; 2127 2128 low = src->addr_low; 2129 src->addr_low = 0xffffffff; 2130 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2131 wmb(); 2132 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2133 wmb(); 2134 src->addr_low = low; 2135 dst->addr_low = low; 2136 wmb(); 2137 } 2138 2139 static int 2140 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2141 { 2142 bus_dma_segment_t seg; 2143 struct mbuf *m; 2144 mxge_rx_ring_t *rx = &ss->rx_small; 2145 int cnt, err; 2146 2147 m = m_gethdr(M_DONTWAIT, MT_DATA); 2148 if (m == NULL) { 2149 rx->alloc_fail++; 2150 err = ENOBUFS; 2151 goto done; 2152 } 2153 m->m_len = MHLEN; 2154 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2155 &seg, &cnt, BUS_DMA_NOWAIT); 2156 if (err != 0) { 2157 m_free(m); 2158 goto done; 2159 } 2160 rx->info[idx].m = m; 2161 rx->shadow[idx].addr_low = 2162 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2163 rx->shadow[idx].addr_high = 2164 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2165 2166 done: 2167 if ((idx & 7) == 7) 2168 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2169 return err; 2170 } 2171 2172 static int 2173 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2174 { 2175 bus_dma_segment_t seg[3]; 2176 struct mbuf *m; 2177 mxge_rx_ring_t *rx = &ss->rx_big; 2178 int cnt, err, i; 2179 2180 if (rx->cl_size == MCLBYTES) 2181 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2182 else 2183 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2184 if (m == NULL) { 2185 rx->alloc_fail++; 2186 err = ENOBUFS; 2187 goto done; 2188 } 2189 m->m_len = rx->cl_size; 2190 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2191 seg, &cnt, BUS_DMA_NOWAIT); 2192 if (err != 0) { 2193 m_free(m); 2194 goto done; 2195 } 2196 rx->info[idx].m = m; 2197 rx->shadow[idx].addr_low = 2198 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2199 rx->shadow[idx].addr_high = 2200 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2201 2202 #if MXGE_VIRT_JUMBOS 2203 for (i = 1; i < cnt; i++) { 2204 rx->shadow[idx + i].addr_low = 2205 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2206 rx->shadow[idx + i].addr_high = 2207 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2208 } 2209 #endif 2210 2211 done: 2212 for (i = 0; i < rx->nbufs; i++) { 2213 if ((idx & 7) == 7) { 2214 mxge_submit_8rx(&rx->lanai[idx - 7], 2215 &rx->shadow[idx - 7]); 2216 } 2217 idx++; 2218 } 2219 return err; 2220 } 2221 2222 /* 2223 * Myri10GE hardware checksums are not valid if the sender 2224 * padded the frame with non-zero padding. This is because 2225 * the firmware just does a simple 16-bit 1s complement 2226 * checksum across the entire frame, excluding the first 14 2227 * bytes. It is best to simply to check the checksum and 2228 * tell the stack about it only if the checksum is good 2229 */ 2230 2231 static inline uint16_t 2232 mxge_rx_csum(struct mbuf *m, int csum) 2233 { 2234 struct ether_header *eh; 2235 struct ip *ip; 2236 uint16_t c; 2237 2238 eh = mtod(m, struct ether_header *); 2239 2240 /* only deal with IPv4 TCP & UDP for now */ 2241 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2242 return 1; 2243 ip = (struct ip *)(eh + 1); 2244 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2245 ip->ip_p != IPPROTO_UDP)) 2246 return 1; 2247 2248 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2249 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2250 - (ip->ip_hl << 2) + ip->ip_p)); 2251 c ^= 0xffff; 2252 return (c); 2253 } 2254 2255 static void 2256 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2257 { 2258 struct ether_vlan_header *evl; 2259 struct ether_header *eh; 2260 uint32_t partial; 2261 2262 evl = mtod(m, struct ether_vlan_header *); 2263 eh = mtod(m, struct ether_header *); 2264 2265 /* 2266 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2267 * after what the firmware thought was the end of the ethernet 2268 * header. 2269 */ 2270 2271 /* put checksum into host byte order */ 2272 *csum = ntohs(*csum); 2273 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2274 (*csum) += ~partial; 2275 (*csum) += ((*csum) < ~partial); 2276 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2277 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2278 2279 /* restore checksum to network byte order; 2280 later consumers expect this */ 2281 *csum = htons(*csum); 2282 2283 /* save the tag */ 2284 #ifdef MXGE_NEW_VLAN_API 2285 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2286 #else 2287 { 2288 struct m_tag *mtag; 2289 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2290 M_NOWAIT); 2291 if (mtag == NULL) 2292 return; 2293 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2294 m_tag_prepend(m, mtag); 2295 } 2296 2297 #endif 2298 m->m_flags |= M_VLANTAG; 2299 2300 /* 2301 * Remove the 802.1q header by copying the Ethernet 2302 * addresses over it and adjusting the beginning of 2303 * the data in the mbuf. The encapsulated Ethernet 2304 * type field is already in place. 2305 */ 2306 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2307 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2308 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2309 } 2310 2311 2312 static inline void 2313 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2314 { 2315 mxge_softc_t *sc; 2316 struct ifnet *ifp; 2317 struct mbuf *m; 2318 struct ether_header *eh; 2319 mxge_rx_ring_t *rx; 2320 bus_dmamap_t old_map; 2321 int idx; 2322 uint16_t tcpudp_csum; 2323 2324 sc = ss->sc; 2325 ifp = sc->ifp; 2326 rx = &ss->rx_big; 2327 idx = rx->cnt & rx->mask; 2328 rx->cnt += rx->nbufs; 2329 /* save a pointer to the received mbuf */ 2330 m = rx->info[idx].m; 2331 /* try to replace the received mbuf */ 2332 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2333 /* drop the frame -- the old mbuf is re-cycled */ 2334 ifp->if_ierrors++; 2335 return; 2336 } 2337 2338 /* unmap the received buffer */ 2339 old_map = rx->info[idx].map; 2340 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2341 bus_dmamap_unload(rx->dmat, old_map); 2342 2343 /* swap the bus_dmamap_t's */ 2344 rx->info[idx].map = rx->extra_map; 2345 rx->extra_map = old_map; 2346 2347 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2348 * aligned */ 2349 m->m_data += MXGEFW_PAD; 2350 2351 m->m_pkthdr.rcvif = ifp; 2352 m->m_len = m->m_pkthdr.len = len; 2353 ss->ipackets++; 2354 eh = mtod(m, struct ether_header *); 2355 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2356 mxge_vlan_tag_remove(m, &csum); 2357 } 2358 /* if the checksum is valid, mark it in the mbuf header */ 2359 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2360 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2361 return; 2362 /* otherwise, it was a UDP frame, or a TCP frame which 2363 we could not do LRO on. Tell the stack that the 2364 checksum is good */ 2365 m->m_pkthdr.csum_data = 0xffff; 2366 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2367 } 2368 /* pass the frame up the stack */ 2369 (*ifp->if_input)(ifp, m); 2370 } 2371 2372 static inline void 2373 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2374 { 2375 mxge_softc_t *sc; 2376 struct ifnet *ifp; 2377 struct ether_header *eh; 2378 struct mbuf *m; 2379 mxge_rx_ring_t *rx; 2380 bus_dmamap_t old_map; 2381 int idx; 2382 uint16_t tcpudp_csum; 2383 2384 sc = ss->sc; 2385 ifp = sc->ifp; 2386 rx = &ss->rx_small; 2387 idx = rx->cnt & rx->mask; 2388 rx->cnt++; 2389 /* save a pointer to the received mbuf */ 2390 m = rx->info[idx].m; 2391 /* try to replace the received mbuf */ 2392 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2393 /* drop the frame -- the old mbuf is re-cycled */ 2394 ifp->if_ierrors++; 2395 return; 2396 } 2397 2398 /* unmap the received buffer */ 2399 old_map = rx->info[idx].map; 2400 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2401 bus_dmamap_unload(rx->dmat, old_map); 2402 2403 /* swap the bus_dmamap_t's */ 2404 rx->info[idx].map = rx->extra_map; 2405 rx->extra_map = old_map; 2406 2407 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2408 * aligned */ 2409 m->m_data += MXGEFW_PAD; 2410 2411 m->m_pkthdr.rcvif = ifp; 2412 m->m_len = m->m_pkthdr.len = len; 2413 ss->ipackets++; 2414 eh = mtod(m, struct ether_header *); 2415 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2416 mxge_vlan_tag_remove(m, &csum); 2417 } 2418 /* if the checksum is valid, mark it in the mbuf header */ 2419 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2420 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2421 return; 2422 /* otherwise, it was a UDP frame, or a TCP frame which 2423 we could not do LRO on. Tell the stack that the 2424 checksum is good */ 2425 m->m_pkthdr.csum_data = 0xffff; 2426 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2427 } 2428 /* pass the frame up the stack */ 2429 (*ifp->if_input)(ifp, m); 2430 } 2431 2432 static inline void 2433 mxge_clean_rx_done(struct mxge_slice_state *ss) 2434 { 2435 mxge_rx_done_t *rx_done = &ss->rx_done; 2436 struct lro_entry *lro; 2437 int limit = 0; 2438 uint16_t length; 2439 uint16_t checksum; 2440 2441 2442 while (rx_done->entry[rx_done->idx].length != 0) { 2443 length = ntohs(rx_done->entry[rx_done->idx].length); 2444 rx_done->entry[rx_done->idx].length = 0; 2445 checksum = rx_done->entry[rx_done->idx].checksum; 2446 if (length <= (MHLEN - MXGEFW_PAD)) 2447 mxge_rx_done_small(ss, length, checksum); 2448 else 2449 mxge_rx_done_big(ss, length, checksum); 2450 rx_done->cnt++; 2451 rx_done->idx = rx_done->cnt & rx_done->mask; 2452 2453 /* limit potential for livelock */ 2454 if (__predict_false(++limit > rx_done->mask / 2)) 2455 break; 2456 } 2457 while (!SLIST_EMPTY(&ss->lro_active)) { 2458 lro = SLIST_FIRST(&ss->lro_active); 2459 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2460 mxge_lro_flush(ss, lro); 2461 } 2462 } 2463 2464 2465 static inline void 2466 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2467 { 2468 struct ifnet *ifp; 2469 mxge_tx_ring_t *tx; 2470 struct mbuf *m; 2471 bus_dmamap_t map; 2472 int idx; 2473 2474 tx = &ss->tx; 2475 ifp = ss->sc->ifp; 2476 while (tx->pkt_done != mcp_idx) { 2477 idx = tx->done & tx->mask; 2478 tx->done++; 2479 m = tx->info[idx].m; 2480 /* mbuf and DMA map only attached to the first 2481 segment per-mbuf */ 2482 if (m != NULL) { 2483 ifp->if_opackets++; 2484 tx->info[idx].m = NULL; 2485 map = tx->info[idx].map; 2486 bus_dmamap_unload(tx->dmat, map); 2487 m_freem(m); 2488 } 2489 if (tx->info[idx].flag) { 2490 tx->info[idx].flag = 0; 2491 tx->pkt_done++; 2492 } 2493 } 2494 2495 /* If we have space, clear IFF_OACTIVE to tell the stack that 2496 its OK to send packets */ 2497 2498 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2499 tx->req - tx->done < (tx->mask + 1)/4) { 2500 mtx_lock(&ss->tx.mtx); 2501 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2502 ss->tx.wake++; 2503 mxge_start_locked(ss); 2504 mtx_unlock(&ss->tx.mtx); 2505 } 2506 } 2507 2508 static struct mxge_media_type mxge_media_types[] = 2509 { 2510 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2511 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2512 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2513 {0, (1 << 5), "10GBASE-ER"}, 2514 {0, (1 << 4), "10GBASE-LRM"}, 2515 {0, (1 << 3), "10GBASE-SW"}, 2516 {0, (1 << 2), "10GBASE-LW"}, 2517 {0, (1 << 1), "10GBASE-EW"}, 2518 {0, (1 << 0), "Reserved"} 2519 }; 2520 2521 static void 2522 mxge_set_media(mxge_softc_t *sc, int type) 2523 { 2524 sc->media_flags |= type; 2525 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2526 ifmedia_set(&sc->media, sc->media_flags); 2527 } 2528 2529 2530 /* 2531 * Determine the media type for a NIC. Some XFPs will identify 2532 * themselves only when their link is up, so this is initiated via a 2533 * link up interrupt. However, this can potentially take up to 2534 * several milliseconds, so it is run via the watchdog routine, rather 2535 * than in the interrupt handler itself. This need only be done 2536 * once, not each time the link is up. 2537 */ 2538 static void 2539 mxge_media_probe(mxge_softc_t *sc) 2540 { 2541 mxge_cmd_t cmd; 2542 char *ptr; 2543 int i, err, ms; 2544 2545 sc->need_media_probe = 0; 2546 2547 /* if we've already set a media type, we're done */ 2548 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2549 return; 2550 2551 /* 2552 * parse the product code to deterimine the interface type 2553 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2554 * after the 3rd dash in the driver's cached copy of the 2555 * EEPROM's product code string. 2556 */ 2557 ptr = sc->product_code_string; 2558 if (ptr == NULL) { 2559 device_printf(sc->dev, "Missing product code\n"); 2560 } 2561 2562 for (i = 0; i < 3; i++, ptr++) { 2563 ptr = index(ptr, '-'); 2564 if (ptr == NULL) { 2565 device_printf(sc->dev, 2566 "only %d dashes in PC?!?\n", i); 2567 return; 2568 } 2569 } 2570 if (*ptr == 'C') { 2571 mxge_set_media(sc, IFM_10G_CX4); 2572 return; 2573 } 2574 else if (*ptr == 'Q') { 2575 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2576 /* FreeBSD has no media type for Quad ribbon fiber */ 2577 return; 2578 } 2579 2580 if (*ptr != 'R') { 2581 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2582 return; 2583 } 2584 2585 /* 2586 * At this point we know the NIC has an XFP cage, so now we 2587 * try to determine what is in the cage by using the 2588 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2589 * register. We read just one byte, which may take over 2590 * a millisecond 2591 */ 2592 2593 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2594 cmd.data1 = MXGE_XFP_COMPLIANCE_BYTE; /* the byte we want */ 2595 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_I2C_READ, &cmd); 2596 if (err == MXGEFW_CMD_ERROR_XFP_FAILURE) { 2597 device_printf(sc->dev, "failed to read XFP\n"); 2598 } 2599 if (err == MXGEFW_CMD_ERROR_XFP_ABSENT) { 2600 device_printf(sc->dev, "Type R with no XFP!?!?\n"); 2601 } 2602 if (err != MXGEFW_CMD_OK) { 2603 return; 2604 } 2605 2606 /* now we wait for the data to be cached */ 2607 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE; 2608 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd); 2609 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2610 DELAY(1000); 2611 cmd.data0 = MXGE_XFP_COMPLIANCE_BYTE; 2612 err = mxge_send_cmd(sc, MXGEFW_CMD_XFP_BYTE, &cmd); 2613 } 2614 if (err != MXGEFW_CMD_OK) { 2615 device_printf(sc->dev, "failed to read XFP (%d, %dms)\n", 2616 err, ms); 2617 return; 2618 } 2619 2620 if (cmd.data0 == mxge_media_types[0].bitmask) { 2621 if (mxge_verbose) 2622 device_printf(sc->dev, "XFP:%s\n", 2623 mxge_media_types[0].name); 2624 mxge_set_media(sc, IFM_10G_CX4); 2625 return; 2626 } 2627 for (i = 1; 2628 i < sizeof (mxge_media_types) / sizeof (mxge_media_types[0]); 2629 i++) { 2630 if (cmd.data0 & mxge_media_types[i].bitmask) { 2631 if (mxge_verbose) 2632 device_printf(sc->dev, "XFP:%s\n", 2633 mxge_media_types[i].name); 2634 2635 mxge_set_media(sc, mxge_media_types[i].flag); 2636 return; 2637 } 2638 } 2639 device_printf(sc->dev, "XFP media 0x%x unknown\n", cmd.data0); 2640 2641 return; 2642 } 2643 2644 static void 2645 mxge_intr(void *arg) 2646 { 2647 struct mxge_slice_state *ss = arg; 2648 mxge_softc_t *sc = ss->sc; 2649 mcp_irq_data_t *stats = ss->fw_stats; 2650 mxge_tx_ring_t *tx = &ss->tx; 2651 mxge_rx_done_t *rx_done = &ss->rx_done; 2652 uint32_t send_done_count; 2653 uint8_t valid; 2654 2655 2656 /* an interrupt on a non-zero slice is implicitly valid 2657 since MSI-X irqs are not shared */ 2658 if (ss != sc->ss) { 2659 mxge_clean_rx_done(ss); 2660 *ss->irq_claim = be32toh(3); 2661 return; 2662 } 2663 2664 /* make sure the DMA has finished */ 2665 if (!stats->valid) { 2666 return; 2667 } 2668 valid = stats->valid; 2669 2670 if (sc->legacy_irq) { 2671 /* lower legacy IRQ */ 2672 *sc->irq_deassert = 0; 2673 if (!mxge_deassert_wait) 2674 /* don't wait for conf. that irq is low */ 2675 stats->valid = 0; 2676 } else { 2677 stats->valid = 0; 2678 } 2679 2680 /* loop while waiting for legacy irq deassertion */ 2681 do { 2682 /* check for transmit completes and receives */ 2683 send_done_count = be32toh(stats->send_done_count); 2684 while ((send_done_count != tx->pkt_done) || 2685 (rx_done->entry[rx_done->idx].length != 0)) { 2686 mxge_tx_done(ss, (int)send_done_count); 2687 mxge_clean_rx_done(ss); 2688 send_done_count = be32toh(stats->send_done_count); 2689 } 2690 if (sc->legacy_irq && mxge_deassert_wait) 2691 wmb(); 2692 } while (*((volatile uint8_t *) &stats->valid)); 2693 2694 if (__predict_false(stats->stats_updated)) { 2695 if (sc->link_state != stats->link_up) { 2696 sc->link_state = stats->link_up; 2697 if (sc->link_state) { 2698 if_link_state_change(sc->ifp, LINK_STATE_UP); 2699 if (mxge_verbose) 2700 device_printf(sc->dev, "link up\n"); 2701 } else { 2702 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2703 if (mxge_verbose) 2704 device_printf(sc->dev, "link down\n"); 2705 } 2706 sc->need_media_probe = 1; 2707 } 2708 if (sc->rdma_tags_available != 2709 be32toh(stats->rdma_tags_available)) { 2710 sc->rdma_tags_available = 2711 be32toh(stats->rdma_tags_available); 2712 device_printf(sc->dev, "RDMA timed out! %d tags " 2713 "left\n", sc->rdma_tags_available); 2714 } 2715 2716 if (stats->link_down) { 2717 sc->down_cnt += stats->link_down; 2718 sc->link_state = 0; 2719 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2720 } 2721 } 2722 2723 /* check to see if we have rx token to pass back */ 2724 if (valid & 0x1) 2725 *ss->irq_claim = be32toh(3); 2726 *(ss->irq_claim + 1) = be32toh(3); 2727 } 2728 2729 static void 2730 mxge_init(void *arg) 2731 { 2732 } 2733 2734 2735 2736 static void 2737 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2738 { 2739 struct lro_entry *lro_entry; 2740 int i; 2741 2742 while (!SLIST_EMPTY(&ss->lro_free)) { 2743 lro_entry = SLIST_FIRST(&ss->lro_free); 2744 SLIST_REMOVE_HEAD(&ss->lro_free, next); 2745 free(lro_entry, M_DEVBUF); 2746 } 2747 2748 for (i = 0; i <= ss->rx_big.mask; i++) { 2749 if (ss->rx_big.info[i].m == NULL) 2750 continue; 2751 bus_dmamap_unload(ss->rx_big.dmat, 2752 ss->rx_big.info[i].map); 2753 m_freem(ss->rx_big.info[i].m); 2754 ss->rx_big.info[i].m = NULL; 2755 } 2756 2757 for (i = 0; i <= ss->rx_small.mask; i++) { 2758 if (ss->rx_small.info[i].m == NULL) 2759 continue; 2760 bus_dmamap_unload(ss->rx_small.dmat, 2761 ss->rx_small.info[i].map); 2762 m_freem(ss->rx_small.info[i].m); 2763 ss->rx_small.info[i].m = NULL; 2764 } 2765 2766 /* transmit ring used only on the first slice */ 2767 if (ss->tx.info == NULL) 2768 return; 2769 2770 for (i = 0; i <= ss->tx.mask; i++) { 2771 ss->tx.info[i].flag = 0; 2772 if (ss->tx.info[i].m == NULL) 2773 continue; 2774 bus_dmamap_unload(ss->tx.dmat, 2775 ss->tx.info[i].map); 2776 m_freem(ss->tx.info[i].m); 2777 ss->tx.info[i].m = NULL; 2778 } 2779 } 2780 2781 static void 2782 mxge_free_mbufs(mxge_softc_t *sc) 2783 { 2784 int slice; 2785 2786 for (slice = 0; slice < sc->num_slices; slice++) 2787 mxge_free_slice_mbufs(&sc->ss[slice]); 2788 } 2789 2790 static void 2791 mxge_free_slice_rings(struct mxge_slice_state *ss) 2792 { 2793 int i; 2794 2795 2796 if (ss->rx_done.entry != NULL) 2797 mxge_dma_free(&ss->rx_done.dma); 2798 ss->rx_done.entry = NULL; 2799 2800 if (ss->tx.req_bytes != NULL) 2801 free(ss->tx.req_bytes, M_DEVBUF); 2802 ss->tx.req_bytes = NULL; 2803 2804 if (ss->tx.seg_list != NULL) 2805 free(ss->tx.seg_list, M_DEVBUF); 2806 ss->tx.seg_list = NULL; 2807 2808 if (ss->rx_small.shadow != NULL) 2809 free(ss->rx_small.shadow, M_DEVBUF); 2810 ss->rx_small.shadow = NULL; 2811 2812 if (ss->rx_big.shadow != NULL) 2813 free(ss->rx_big.shadow, M_DEVBUF); 2814 ss->rx_big.shadow = NULL; 2815 2816 if (ss->tx.info != NULL) { 2817 if (ss->tx.dmat != NULL) { 2818 for (i = 0; i <= ss->tx.mask; i++) { 2819 bus_dmamap_destroy(ss->tx.dmat, 2820 ss->tx.info[i].map); 2821 } 2822 bus_dma_tag_destroy(ss->tx.dmat); 2823 } 2824 free(ss->tx.info, M_DEVBUF); 2825 } 2826 ss->tx.info = NULL; 2827 2828 if (ss->rx_small.info != NULL) { 2829 if (ss->rx_small.dmat != NULL) { 2830 for (i = 0; i <= ss->rx_small.mask; i++) { 2831 bus_dmamap_destroy(ss->rx_small.dmat, 2832 ss->rx_small.info[i].map); 2833 } 2834 bus_dmamap_destroy(ss->rx_small.dmat, 2835 ss->rx_small.extra_map); 2836 bus_dma_tag_destroy(ss->rx_small.dmat); 2837 } 2838 free(ss->rx_small.info, M_DEVBUF); 2839 } 2840 ss->rx_small.info = NULL; 2841 2842 if (ss->rx_big.info != NULL) { 2843 if (ss->rx_big.dmat != NULL) { 2844 for (i = 0; i <= ss->rx_big.mask; i++) { 2845 bus_dmamap_destroy(ss->rx_big.dmat, 2846 ss->rx_big.info[i].map); 2847 } 2848 bus_dmamap_destroy(ss->rx_big.dmat, 2849 ss->rx_big.extra_map); 2850 bus_dma_tag_destroy(ss->rx_big.dmat); 2851 } 2852 free(ss->rx_big.info, M_DEVBUF); 2853 } 2854 ss->rx_big.info = NULL; 2855 } 2856 2857 static void 2858 mxge_free_rings(mxge_softc_t *sc) 2859 { 2860 int slice; 2861 2862 for (slice = 0; slice < sc->num_slices; slice++) 2863 mxge_free_slice_rings(&sc->ss[slice]); 2864 } 2865 2866 static int 2867 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2868 int tx_ring_entries) 2869 { 2870 mxge_softc_t *sc = ss->sc; 2871 size_t bytes; 2872 int err, i; 2873 2874 err = ENOMEM; 2875 2876 /* allocate per-slice receive resources */ 2877 2878 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 2879 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 2880 2881 /* allocate the rx shadow rings */ 2882 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 2883 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2884 if (ss->rx_small.shadow == NULL) 2885 return err;; 2886 2887 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 2888 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2889 if (ss->rx_big.shadow == NULL) 2890 return err;; 2891 2892 /* allocate the rx host info rings */ 2893 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 2894 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2895 if (ss->rx_small.info == NULL) 2896 return err;; 2897 2898 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 2899 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2900 if (ss->rx_big.info == NULL) 2901 return err;; 2902 2903 /* allocate the rx busdma resources */ 2904 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2905 1, /* alignment */ 2906 4096, /* boundary */ 2907 BUS_SPACE_MAXADDR, /* low */ 2908 BUS_SPACE_MAXADDR, /* high */ 2909 NULL, NULL, /* filter */ 2910 MHLEN, /* maxsize */ 2911 1, /* num segs */ 2912 MHLEN, /* maxsegsize */ 2913 BUS_DMA_ALLOCNOW, /* flags */ 2914 NULL, NULL, /* lock */ 2915 &ss->rx_small.dmat); /* tag */ 2916 if (err != 0) { 2917 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2918 err); 2919 return err;; 2920 } 2921 2922 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2923 1, /* alignment */ 2924 #if MXGE_VIRT_JUMBOS 2925 4096, /* boundary */ 2926 #else 2927 0, /* boundary */ 2928 #endif 2929 BUS_SPACE_MAXADDR, /* low */ 2930 BUS_SPACE_MAXADDR, /* high */ 2931 NULL, NULL, /* filter */ 2932 3*4096, /* maxsize */ 2933 #if MXGE_VIRT_JUMBOS 2934 3, /* num segs */ 2935 4096, /* maxsegsize*/ 2936 #else 2937 1, /* num segs */ 2938 MJUM9BYTES, /* maxsegsize*/ 2939 #endif 2940 BUS_DMA_ALLOCNOW, /* flags */ 2941 NULL, NULL, /* lock */ 2942 &ss->rx_big.dmat); /* tag */ 2943 if (err != 0) { 2944 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2945 err); 2946 return err;; 2947 } 2948 for (i = 0; i <= ss->rx_small.mask; i++) { 2949 err = bus_dmamap_create(ss->rx_small.dmat, 0, 2950 &ss->rx_small.info[i].map); 2951 if (err != 0) { 2952 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2953 err); 2954 return err;; 2955 } 2956 } 2957 err = bus_dmamap_create(ss->rx_small.dmat, 0, 2958 &ss->rx_small.extra_map); 2959 if (err != 0) { 2960 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2961 err); 2962 return err;; 2963 } 2964 2965 for (i = 0; i <= ss->rx_big.mask; i++) { 2966 err = bus_dmamap_create(ss->rx_big.dmat, 0, 2967 &ss->rx_big.info[i].map); 2968 if (err != 0) { 2969 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2970 err); 2971 return err;; 2972 } 2973 } 2974 err = bus_dmamap_create(ss->rx_big.dmat, 0, 2975 &ss->rx_big.extra_map); 2976 if (err != 0) { 2977 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2978 err); 2979 return err;; 2980 } 2981 2982 /* now allocate TX resouces */ 2983 2984 /* only use a single TX ring for now */ 2985 if (ss != ss->sc->ss) 2986 return 0; 2987 2988 ss->tx.mask = tx_ring_entries - 1; 2989 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 2990 2991 2992 /* allocate the tx request copy block */ 2993 bytes = 8 + 2994 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 2995 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2996 if (ss->tx.req_bytes == NULL) 2997 return err;; 2998 /* ensure req_list entries are aligned to 8 bytes */ 2999 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3000 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3001 3002 /* allocate the tx busdma segment list */ 3003 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3004 ss->tx.seg_list = (bus_dma_segment_t *) 3005 malloc(bytes, M_DEVBUF, M_WAITOK); 3006 if (ss->tx.seg_list == NULL) 3007 return err;; 3008 3009 /* allocate the tx host info ring */ 3010 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3011 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3012 if (ss->tx.info == NULL) 3013 return err;; 3014 3015 /* allocate the tx busdma resources */ 3016 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3017 1, /* alignment */ 3018 sc->tx_boundary, /* boundary */ 3019 BUS_SPACE_MAXADDR, /* low */ 3020 BUS_SPACE_MAXADDR, /* high */ 3021 NULL, NULL, /* filter */ 3022 65536 + 256, /* maxsize */ 3023 ss->tx.max_desc - 2, /* num segs */ 3024 sc->tx_boundary, /* maxsegsz */ 3025 BUS_DMA_ALLOCNOW, /* flags */ 3026 NULL, NULL, /* lock */ 3027 &ss->tx.dmat); /* tag */ 3028 3029 if (err != 0) { 3030 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3031 err); 3032 return err;; 3033 } 3034 3035 /* now use these tags to setup dmamaps for each slot 3036 in the ring */ 3037 for (i = 0; i <= ss->tx.mask; i++) { 3038 err = bus_dmamap_create(ss->tx.dmat, 0, 3039 &ss->tx.info[i].map); 3040 if (err != 0) { 3041 device_printf(sc->dev, "Err %d tx dmamap\n", 3042 err); 3043 return err;; 3044 } 3045 } 3046 return 0; 3047 3048 } 3049 3050 static int 3051 mxge_alloc_rings(mxge_softc_t *sc) 3052 { 3053 mxge_cmd_t cmd; 3054 int tx_ring_size; 3055 int tx_ring_entries, rx_ring_entries; 3056 int err, slice; 3057 3058 /* get ring sizes */ 3059 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3060 tx_ring_size = cmd.data0; 3061 if (err != 0) { 3062 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3063 goto abort; 3064 } 3065 3066 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3067 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3068 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3069 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3070 IFQ_SET_READY(&sc->ifp->if_snd); 3071 3072 for (slice = 0; slice < sc->num_slices; slice++) { 3073 err = mxge_alloc_slice_rings(&sc->ss[slice], 3074 rx_ring_entries, 3075 tx_ring_entries); 3076 if (err != 0) 3077 goto abort; 3078 } 3079 return 0; 3080 3081 abort: 3082 mxge_free_rings(sc); 3083 return err; 3084 3085 } 3086 3087 3088 static void 3089 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3090 { 3091 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3092 3093 if (bufsize < MCLBYTES) { 3094 /* easy, everything fits in a single buffer */ 3095 *big_buf_size = MCLBYTES; 3096 *cl_size = MCLBYTES; 3097 *nbufs = 1; 3098 return; 3099 } 3100 3101 if (bufsize < MJUMPAGESIZE) { 3102 /* still easy, everything still fits in a single buffer */ 3103 *big_buf_size = MJUMPAGESIZE; 3104 *cl_size = MJUMPAGESIZE; 3105 *nbufs = 1; 3106 return; 3107 } 3108 #if MXGE_VIRT_JUMBOS 3109 /* now we need to use virtually contiguous buffers */ 3110 *cl_size = MJUM9BYTES; 3111 *big_buf_size = 4096; 3112 *nbufs = mtu / 4096 + 1; 3113 /* needs to be a power of two, so round up */ 3114 if (*nbufs == 3) 3115 *nbufs = 4; 3116 #else 3117 *cl_size = MJUM9BYTES; 3118 *big_buf_size = MJUM9BYTES; 3119 *nbufs = 1; 3120 #endif 3121 } 3122 3123 static int 3124 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3125 { 3126 mxge_softc_t *sc; 3127 mxge_cmd_t cmd; 3128 bus_dmamap_t map; 3129 struct lro_entry *lro_entry; 3130 int err, i, slice; 3131 3132 3133 sc = ss->sc; 3134 slice = ss - sc->ss; 3135 3136 SLIST_INIT(&ss->lro_free); 3137 SLIST_INIT(&ss->lro_active); 3138 3139 for (i = 0; i < sc->lro_cnt; i++) { 3140 lro_entry = (struct lro_entry *) 3141 malloc(sizeof (*lro_entry), M_DEVBUF, 3142 M_NOWAIT | M_ZERO); 3143 if (lro_entry == NULL) { 3144 sc->lro_cnt = i; 3145 break; 3146 } 3147 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3148 } 3149 /* get the lanai pointers to the send and receive rings */ 3150 3151 err = 0; 3152 /* We currently only send from the first slice */ 3153 if (slice == 0) { 3154 cmd.data0 = slice; 3155 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3156 ss->tx.lanai = 3157 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3158 } 3159 cmd.data0 = slice; 3160 err |= mxge_send_cmd(sc, 3161 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3162 ss->rx_small.lanai = 3163 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3164 cmd.data0 = slice; 3165 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3166 ss->rx_big.lanai = 3167 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3168 3169 if (err != 0) { 3170 device_printf(sc->dev, 3171 "failed to get ring sizes or locations\n"); 3172 return EIO; 3173 } 3174 3175 /* stock receive rings */ 3176 for (i = 0; i <= ss->rx_small.mask; i++) { 3177 map = ss->rx_small.info[i].map; 3178 err = mxge_get_buf_small(ss, map, i); 3179 if (err) { 3180 device_printf(sc->dev, "alloced %d/%d smalls\n", 3181 i, ss->rx_small.mask + 1); 3182 return ENOMEM; 3183 } 3184 } 3185 for (i = 0; i <= ss->rx_big.mask; i++) { 3186 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3187 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3188 } 3189 ss->rx_big.nbufs = nbufs; 3190 ss->rx_big.cl_size = cl_size; 3191 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3192 map = ss->rx_big.info[i].map; 3193 err = mxge_get_buf_big(ss, map, i); 3194 if (err) { 3195 device_printf(sc->dev, "alloced %d/%d bigs\n", 3196 i, ss->rx_big.mask + 1); 3197 return ENOMEM; 3198 } 3199 } 3200 return 0; 3201 } 3202 3203 static int 3204 mxge_open(mxge_softc_t *sc) 3205 { 3206 mxge_cmd_t cmd; 3207 int err, big_bytes, nbufs, slice, cl_size, i; 3208 bus_addr_t bus; 3209 volatile uint8_t *itable; 3210 3211 /* Copy the MAC address in case it was overridden */ 3212 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3213 3214 err = mxge_reset(sc, 1); 3215 if (err != 0) { 3216 device_printf(sc->dev, "failed to reset\n"); 3217 return EIO; 3218 } 3219 3220 if (sc->num_slices > 1) { 3221 /* setup the indirection table */ 3222 cmd.data0 = sc->num_slices; 3223 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3224 &cmd); 3225 3226 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3227 &cmd); 3228 if (err != 0) { 3229 device_printf(sc->dev, 3230 "failed to setup rss tables\n"); 3231 return err; 3232 } 3233 3234 /* just enable an identity mapping */ 3235 itable = sc->sram + cmd.data0; 3236 for (i = 0; i < sc->num_slices; i++) 3237 itable[i] = (uint8_t)i; 3238 3239 cmd.data0 = 1; 3240 cmd.data1 = mxge_rss_hash_type; 3241 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3242 if (err != 0) { 3243 device_printf(sc->dev, "failed to enable slices\n"); 3244 return err; 3245 } 3246 } 3247 3248 3249 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3250 3251 cmd.data0 = nbufs; 3252 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3253 &cmd); 3254 /* error is only meaningful if we're trying to set 3255 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3256 if (err && nbufs > 1) { 3257 device_printf(sc->dev, 3258 "Failed to set alway-use-n to %d\n", 3259 nbufs); 3260 return EIO; 3261 } 3262 /* Give the firmware the mtu and the big and small buffer 3263 sizes. The firmware wants the big buf size to be a power 3264 of two. Luckily, FreeBSD's clusters are powers of two */ 3265 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3266 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3267 cmd.data0 = MHLEN - MXGEFW_PAD; 3268 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3269 &cmd); 3270 cmd.data0 = big_bytes; 3271 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3272 3273 if (err != 0) { 3274 device_printf(sc->dev, "failed to setup params\n"); 3275 goto abort; 3276 } 3277 3278 /* Now give him the pointer to the stats block */ 3279 cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3280 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); 3281 cmd.data2 = sizeof(struct mcp_irq_data); 3282 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3283 3284 if (err != 0) { 3285 bus = sc->ss->fw_stats_dma.bus_addr; 3286 bus += offsetof(struct mcp_irq_data, send_done_count); 3287 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3288 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3289 err = mxge_send_cmd(sc, 3290 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3291 &cmd); 3292 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3293 sc->fw_multicast_support = 0; 3294 } else { 3295 sc->fw_multicast_support = 1; 3296 } 3297 3298 if (err != 0) { 3299 device_printf(sc->dev, "failed to setup params\n"); 3300 goto abort; 3301 } 3302 3303 for (slice = 0; slice < sc->num_slices; slice++) { 3304 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3305 if (err != 0) { 3306 device_printf(sc->dev, "couldn't open slice %d\n", 3307 slice); 3308 goto abort; 3309 } 3310 } 3311 3312 /* Finally, start the firmware running */ 3313 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3314 if (err) { 3315 device_printf(sc->dev, "Couldn't bring up link\n"); 3316 goto abort; 3317 } 3318 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3319 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3320 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3321 3322 return 0; 3323 3324 3325 abort: 3326 mxge_free_mbufs(sc); 3327 3328 return err; 3329 } 3330 3331 static int 3332 mxge_close(mxge_softc_t *sc) 3333 { 3334 mxge_cmd_t cmd; 3335 int err, old_down_cnt; 3336 3337 callout_stop(&sc->co_hdl); 3338 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3339 old_down_cnt = sc->down_cnt; 3340 wmb(); 3341 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3342 if (err) { 3343 device_printf(sc->dev, "Couldn't bring down link\n"); 3344 } 3345 if (old_down_cnt == sc->down_cnt) { 3346 /* wait for down irq */ 3347 DELAY(10 * sc->intr_coal_delay); 3348 } 3349 wmb(); 3350 if (old_down_cnt == sc->down_cnt) { 3351 device_printf(sc->dev, "never got down irq\n"); 3352 } 3353 3354 mxge_free_mbufs(sc); 3355 3356 return 0; 3357 } 3358 3359 static void 3360 mxge_setup_cfg_space(mxge_softc_t *sc) 3361 { 3362 device_t dev = sc->dev; 3363 int reg; 3364 uint16_t cmd, lnk, pectl; 3365 3366 /* find the PCIe link width and set max read request to 4KB*/ 3367 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3368 lnk = pci_read_config(dev, reg + 0x12, 2); 3369 sc->link_width = (lnk >> 4) & 0x3f; 3370 3371 pectl = pci_read_config(dev, reg + 0x8, 2); 3372 pectl = (pectl & ~0x7000) | (5 << 12); 3373 pci_write_config(dev, reg + 0x8, pectl, 2); 3374 } 3375 3376 /* Enable DMA and Memory space access */ 3377 pci_enable_busmaster(dev); 3378 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3379 cmd |= PCIM_CMD_MEMEN; 3380 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3381 } 3382 3383 static uint32_t 3384 mxge_read_reboot(mxge_softc_t *sc) 3385 { 3386 device_t dev = sc->dev; 3387 uint32_t vs; 3388 3389 /* find the vendor specific offset */ 3390 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3391 device_printf(sc->dev, 3392 "could not find vendor specific offset\n"); 3393 return (uint32_t)-1; 3394 } 3395 /* enable read32 mode */ 3396 pci_write_config(dev, vs + 0x10, 0x3, 1); 3397 /* tell NIC which register to read */ 3398 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3399 return (pci_read_config(dev, vs + 0x14, 4)); 3400 } 3401 3402 static int 3403 mxge_watchdog_reset(mxge_softc_t *sc) 3404 { 3405 struct pci_devinfo *dinfo; 3406 int err; 3407 uint32_t reboot; 3408 uint16_t cmd; 3409 3410 err = ENXIO; 3411 3412 device_printf(sc->dev, "Watchdog reset!\n"); 3413 3414 /* 3415 * check to see if the NIC rebooted. If it did, then all of 3416 * PCI config space has been reset, and things like the 3417 * busmaster bit will be zero. If this is the case, then we 3418 * must restore PCI config space before the NIC can be used 3419 * again 3420 */ 3421 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3422 if (cmd == 0xffff) { 3423 /* 3424 * maybe the watchdog caught the NIC rebooting; wait 3425 * up to 100ms for it to finish. If it does not come 3426 * back, then give up 3427 */ 3428 DELAY(1000*100); 3429 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3430 if (cmd == 0xffff) { 3431 device_printf(sc->dev, "NIC disappeared!\n"); 3432 return (err); 3433 } 3434 } 3435 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3436 /* print the reboot status */ 3437 reboot = mxge_read_reboot(sc); 3438 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3439 reboot); 3440 /* restore PCI configuration space */ 3441 dinfo = device_get_ivars(sc->dev); 3442 pci_cfg_restore(sc->dev, dinfo); 3443 3444 /* and redo any changes we made to our config space */ 3445 mxge_setup_cfg_space(sc); 3446 3447 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3448 mxge_close(sc); 3449 err = mxge_open(sc); 3450 } 3451 } else { 3452 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 3453 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 3454 sc->ss->tx.req, sc->ss->tx.done); 3455 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3456 sc->ss->tx.pkt_done, 3457 be32toh(sc->ss->fw_stats->send_done_count)); 3458 device_printf(sc->dev, "not resetting\n"); 3459 } 3460 return (err); 3461 } 3462 3463 static int 3464 mxge_watchdog(mxge_softc_t *sc) 3465 { 3466 mxge_tx_ring_t *tx = &sc->ss->tx; 3467 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3468 int err = 0; 3469 3470 /* see if we have outstanding transmits, which 3471 have been pending for more than mxge_ticks */ 3472 if (tx->req != tx->done && 3473 tx->watchdog_req != tx->watchdog_done && 3474 tx->done == tx->watchdog_done) { 3475 /* check for pause blocking before resetting */ 3476 if (tx->watchdog_rx_pause == rx_pause) 3477 err = mxge_watchdog_reset(sc); 3478 else 3479 device_printf(sc->dev, "Flow control blocking " 3480 "xmits, check link partner\n"); 3481 } 3482 3483 tx->watchdog_req = tx->req; 3484 tx->watchdog_done = tx->done; 3485 tx->watchdog_rx_pause = rx_pause; 3486 3487 if (sc->need_media_probe) 3488 mxge_media_probe(sc); 3489 return (err); 3490 } 3491 3492 static void 3493 mxge_update_stats(mxge_softc_t *sc) 3494 { 3495 struct mxge_slice_state *ss; 3496 u_long ipackets = 0; 3497 int slice; 3498 3499 for(slice = 0; slice < sc->num_slices; slice++) { 3500 ss = &sc->ss[slice]; 3501 ipackets += ss->ipackets; 3502 } 3503 sc->ifp->if_ipackets = ipackets; 3504 3505 } 3506 static void 3507 mxge_tick(void *arg) 3508 { 3509 mxge_softc_t *sc = arg; 3510 int err = 0; 3511 3512 /* aggregate stats from different slices */ 3513 mxge_update_stats(sc); 3514 if (!sc->watchdog_countdown) { 3515 err = mxge_watchdog(sc); 3516 sc->watchdog_countdown = 4; 3517 } 3518 sc->watchdog_countdown--; 3519 if (err == 0) 3520 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3521 3522 } 3523 3524 static int 3525 mxge_media_change(struct ifnet *ifp) 3526 { 3527 return EINVAL; 3528 } 3529 3530 static int 3531 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3532 { 3533 struct ifnet *ifp = sc->ifp; 3534 int real_mtu, old_mtu; 3535 int err = 0; 3536 3537 3538 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3539 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3540 return EINVAL; 3541 mtx_lock(&sc->driver_mtx); 3542 old_mtu = ifp->if_mtu; 3543 ifp->if_mtu = mtu; 3544 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3545 mxge_close(sc); 3546 err = mxge_open(sc); 3547 if (err != 0) { 3548 ifp->if_mtu = old_mtu; 3549 mxge_close(sc); 3550 (void) mxge_open(sc); 3551 } 3552 } 3553 mtx_unlock(&sc->driver_mtx); 3554 return err; 3555 } 3556 3557 static void 3558 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3559 { 3560 mxge_softc_t *sc = ifp->if_softc; 3561 3562 3563 if (sc == NULL) 3564 return; 3565 ifmr->ifm_status = IFM_AVALID; 3566 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3567 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3568 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3569 } 3570 3571 static int 3572 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3573 { 3574 mxge_softc_t *sc = ifp->if_softc; 3575 struct ifreq *ifr = (struct ifreq *)data; 3576 int err, mask; 3577 3578 err = 0; 3579 switch (command) { 3580 case SIOCSIFADDR: 3581 case SIOCGIFADDR: 3582 err = ether_ioctl(ifp, command, data); 3583 break; 3584 3585 case SIOCSIFMTU: 3586 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3587 break; 3588 3589 case SIOCSIFFLAGS: 3590 mtx_lock(&sc->driver_mtx); 3591 if (ifp->if_flags & IFF_UP) { 3592 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3593 err = mxge_open(sc); 3594 } else { 3595 /* take care of promis can allmulti 3596 flag chages */ 3597 mxge_change_promisc(sc, 3598 ifp->if_flags & IFF_PROMISC); 3599 mxge_set_multicast_list(sc); 3600 } 3601 } else { 3602 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3603 mxge_close(sc); 3604 } 3605 } 3606 mtx_unlock(&sc->driver_mtx); 3607 break; 3608 3609 case SIOCADDMULTI: 3610 case SIOCDELMULTI: 3611 mtx_lock(&sc->driver_mtx); 3612 mxge_set_multicast_list(sc); 3613 mtx_unlock(&sc->driver_mtx); 3614 break; 3615 3616 case SIOCSIFCAP: 3617 mtx_lock(&sc->driver_mtx); 3618 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3619 if (mask & IFCAP_TXCSUM) { 3620 if (IFCAP_TXCSUM & ifp->if_capenable) { 3621 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3622 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3623 | CSUM_TSO); 3624 } else { 3625 ifp->if_capenable |= IFCAP_TXCSUM; 3626 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3627 } 3628 } else if (mask & IFCAP_RXCSUM) { 3629 if (IFCAP_RXCSUM & ifp->if_capenable) { 3630 ifp->if_capenable &= ~IFCAP_RXCSUM; 3631 sc->csum_flag = 0; 3632 } else { 3633 ifp->if_capenable |= IFCAP_RXCSUM; 3634 sc->csum_flag = 1; 3635 } 3636 } 3637 if (mask & IFCAP_TSO4) { 3638 if (IFCAP_TSO4 & ifp->if_capenable) { 3639 ifp->if_capenable &= ~IFCAP_TSO4; 3640 ifp->if_hwassist &= ~CSUM_TSO; 3641 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3642 ifp->if_capenable |= IFCAP_TSO4; 3643 ifp->if_hwassist |= CSUM_TSO; 3644 } else { 3645 printf("mxge requires tx checksum offload" 3646 " be enabled to use TSO\n"); 3647 err = EINVAL; 3648 } 3649 } 3650 if (mask & IFCAP_LRO) { 3651 if (IFCAP_LRO & ifp->if_capenable) 3652 err = mxge_change_lro_locked(sc, 0); 3653 else 3654 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3655 } 3656 if (mask & IFCAP_VLAN_HWTAGGING) 3657 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3658 mtx_unlock(&sc->driver_mtx); 3659 VLAN_CAPABILITIES(ifp); 3660 3661 break; 3662 3663 case SIOCGIFMEDIA: 3664 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3665 &sc->media, command); 3666 break; 3667 3668 default: 3669 err = ENOTTY; 3670 } 3671 return err; 3672 } 3673 3674 static void 3675 mxge_fetch_tunables(mxge_softc_t *sc) 3676 { 3677 3678 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 3679 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3680 &mxge_flow_control); 3681 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3682 &mxge_intr_coal_delay); 3683 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3684 &mxge_nvidia_ecrc_enable); 3685 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3686 &mxge_force_firmware); 3687 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3688 &mxge_deassert_wait); 3689 TUNABLE_INT_FETCH("hw.mxge.verbose", 3690 &mxge_verbose); 3691 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3692 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3693 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 3694 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 3695 if (sc->lro_cnt != 0) 3696 mxge_lro_cnt = sc->lro_cnt; 3697 3698 if (bootverbose) 3699 mxge_verbose = 1; 3700 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3701 mxge_intr_coal_delay = 30; 3702 if (mxge_ticks == 0) 3703 mxge_ticks = hz / 2; 3704 sc->pause = mxge_flow_control; 3705 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 3706 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) { 3707 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 3708 } 3709 } 3710 3711 3712 static void 3713 mxge_free_slices(mxge_softc_t *sc) 3714 { 3715 struct mxge_slice_state *ss; 3716 int i; 3717 3718 3719 if (sc->ss == NULL) 3720 return; 3721 3722 for (i = 0; i < sc->num_slices; i++) { 3723 ss = &sc->ss[i]; 3724 if (ss->fw_stats != NULL) { 3725 mxge_dma_free(&ss->fw_stats_dma); 3726 ss->fw_stats = NULL; 3727 mtx_destroy(&ss->tx.mtx); 3728 } 3729 if (ss->rx_done.entry != NULL) { 3730 mxge_dma_free(&ss->rx_done.dma); 3731 ss->rx_done.entry = NULL; 3732 } 3733 } 3734 free(sc->ss, M_DEVBUF); 3735 sc->ss = NULL; 3736 } 3737 3738 static int 3739 mxge_alloc_slices(mxge_softc_t *sc) 3740 { 3741 mxge_cmd_t cmd; 3742 struct mxge_slice_state *ss; 3743 size_t bytes; 3744 int err, i, max_intr_slots; 3745 3746 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3747 if (err != 0) { 3748 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3749 return err; 3750 } 3751 sc->rx_ring_size = cmd.data0; 3752 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 3753 3754 bytes = sizeof (*sc->ss) * sc->num_slices; 3755 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 3756 if (sc->ss == NULL) 3757 return (ENOMEM); 3758 for (i = 0; i < sc->num_slices; i++) { 3759 ss = &sc->ss[i]; 3760 3761 ss->sc = sc; 3762 3763 /* allocate per-slice rx interrupt queues */ 3764 3765 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 3766 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 3767 if (err != 0) 3768 goto abort; 3769 ss->rx_done.entry = ss->rx_done.dma.addr; 3770 bzero(ss->rx_done.entry, bytes); 3771 3772 /* 3773 * allocate the per-slice firmware stats; stats 3774 * (including tx) are used used only on the first 3775 * slice for now 3776 */ 3777 if (i > 0) 3778 continue; 3779 3780 bytes = sizeof (*ss->fw_stats); 3781 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3782 sizeof (*ss->fw_stats), 64); 3783 if (err != 0) 3784 goto abort; 3785 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 3786 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 3787 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 3788 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 3789 } 3790 3791 return (0); 3792 3793 abort: 3794 mxge_free_slices(sc); 3795 return (ENOMEM); 3796 } 3797 3798 static void 3799 mxge_slice_probe(mxge_softc_t *sc) 3800 { 3801 mxge_cmd_t cmd; 3802 char *old_fw; 3803 int msix_cnt, status, max_intr_slots; 3804 3805 sc->num_slices = 1; 3806 /* 3807 * don't enable multiple slices if they are not enabled, 3808 * or if this is not an SMP system 3809 */ 3810 3811 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 3812 return; 3813 3814 /* see how many MSI-X interrupts are available */ 3815 msix_cnt = pci_msix_count(sc->dev); 3816 if (msix_cnt < 2) 3817 return; 3818 3819 /* now load the slice aware firmware see what it supports */ 3820 old_fw = sc->fw_name; 3821 if (old_fw == mxge_fw_aligned) 3822 sc->fw_name = mxge_fw_rss_aligned; 3823 else 3824 sc->fw_name = mxge_fw_rss_unaligned; 3825 status = mxge_load_firmware(sc, 0); 3826 if (status != 0) { 3827 device_printf(sc->dev, "Falling back to a single slice\n"); 3828 return; 3829 } 3830 3831 /* try to send a reset command to the card to see if it 3832 is alive */ 3833 memset(&cmd, 0, sizeof (cmd)); 3834 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 3835 if (status != 0) { 3836 device_printf(sc->dev, "failed reset\n"); 3837 goto abort_with_fw; 3838 } 3839 3840 /* get rx ring size */ 3841 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3842 if (status != 0) { 3843 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3844 goto abort_with_fw; 3845 } 3846 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 3847 3848 /* tell it the size of the interrupt queues */ 3849 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 3850 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 3851 if (status != 0) { 3852 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3853 goto abort_with_fw; 3854 } 3855 3856 /* ask the maximum number of slices it supports */ 3857 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 3858 if (status != 0) { 3859 device_printf(sc->dev, 3860 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 3861 goto abort_with_fw; 3862 } 3863 sc->num_slices = cmd.data0; 3864 if (sc->num_slices > msix_cnt) 3865 sc->num_slices = msix_cnt; 3866 3867 if (mxge_max_slices == -1) { 3868 /* cap to number of CPUs in system */ 3869 if (sc->num_slices > mp_ncpus) 3870 sc->num_slices = mp_ncpus; 3871 } else { 3872 if (sc->num_slices > mxge_max_slices) 3873 sc->num_slices = mxge_max_slices; 3874 } 3875 /* make sure it is a power of two */ 3876 while (sc->num_slices & (sc->num_slices - 1)) 3877 sc->num_slices--; 3878 3879 if (mxge_verbose) 3880 device_printf(sc->dev, "using %d slices\n", 3881 sc->num_slices); 3882 3883 return; 3884 3885 abort_with_fw: 3886 sc->fw_name = old_fw; 3887 (void) mxge_load_firmware(sc, 0); 3888 } 3889 3890 static int 3891 mxge_add_msix_irqs(mxge_softc_t *sc) 3892 { 3893 size_t bytes; 3894 int count, err, i, rid; 3895 3896 rid = PCIR_BAR(2); 3897 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 3898 &rid, RF_ACTIVE); 3899 3900 if (sc->msix_table_res == NULL) { 3901 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 3902 return ENXIO; 3903 } 3904 3905 count = sc->num_slices; 3906 err = pci_alloc_msix(sc->dev, &count); 3907 if (err != 0) { 3908 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 3909 "err = %d \n", sc->num_slices, err); 3910 goto abort_with_msix_table; 3911 } 3912 if (count < sc->num_slices) { 3913 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 3914 count, sc->num_slices); 3915 device_printf(sc->dev, 3916 "Try setting hw.mxge.max_slices to %d\n", 3917 count); 3918 err = ENOSPC; 3919 goto abort_with_msix; 3920 } 3921 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 3922 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3923 if (sc->msix_irq_res == NULL) { 3924 err = ENOMEM; 3925 goto abort_with_msix; 3926 } 3927 3928 for (i = 0; i < sc->num_slices; i++) { 3929 rid = i + 1; 3930 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 3931 SYS_RES_IRQ, 3932 &rid, RF_ACTIVE); 3933 if (sc->msix_irq_res[i] == NULL) { 3934 device_printf(sc->dev, "couldn't allocate IRQ res" 3935 " for message %d\n", i); 3936 err = ENXIO; 3937 goto abort_with_res; 3938 } 3939 } 3940 3941 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 3942 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3943 3944 for (i = 0; i < sc->num_slices; i++) { 3945 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 3946 INTR_TYPE_NET | INTR_MPSAFE, 3947 #if __FreeBSD_version > 700030 3948 NULL, 3949 #endif 3950 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 3951 if (err != 0) { 3952 device_printf(sc->dev, "couldn't setup intr for " 3953 "message %d\n", i); 3954 goto abort_with_intr; 3955 } 3956 } 3957 3958 if (mxge_verbose) { 3959 device_printf(sc->dev, "using %d msix IRQs:", 3960 sc->num_slices); 3961 for (i = 0; i < sc->num_slices; i++) 3962 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 3963 printf("\n"); 3964 } 3965 return (0); 3966 3967 abort_with_intr: 3968 for (i = 0; i < sc->num_slices; i++) { 3969 if (sc->msix_ih[i] != NULL) { 3970 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 3971 sc->msix_ih[i]); 3972 sc->msix_ih[i] = NULL; 3973 } 3974 } 3975 free(sc->msix_ih, M_DEVBUF); 3976 3977 3978 abort_with_res: 3979 for (i = 0; i < sc->num_slices; i++) { 3980 rid = i + 1; 3981 if (sc->msix_irq_res[i] != NULL) 3982 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 3983 sc->msix_irq_res[i]); 3984 sc->msix_irq_res[i] = NULL; 3985 } 3986 free(sc->msix_irq_res, M_DEVBUF); 3987 3988 3989 abort_with_msix: 3990 pci_release_msi(sc->dev); 3991 3992 abort_with_msix_table: 3993 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 3994 sc->msix_table_res); 3995 3996 return err; 3997 } 3998 3999 static int 4000 mxge_add_single_irq(mxge_softc_t *sc) 4001 { 4002 int count, err, rid; 4003 4004 count = pci_msi_count(sc->dev); 4005 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4006 rid = 1; 4007 } else { 4008 rid = 0; 4009 sc->legacy_irq = 1; 4010 } 4011 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4012 1, RF_SHAREABLE | RF_ACTIVE); 4013 if (sc->irq_res == NULL) { 4014 device_printf(sc->dev, "could not alloc interrupt\n"); 4015 return ENXIO; 4016 } 4017 if (mxge_verbose) 4018 device_printf(sc->dev, "using %s irq %ld\n", 4019 sc->legacy_irq ? "INTx" : "MSI", 4020 rman_get_start(sc->irq_res)); 4021 err = bus_setup_intr(sc->dev, sc->irq_res, 4022 INTR_TYPE_NET | INTR_MPSAFE, 4023 #if __FreeBSD_version > 700030 4024 NULL, 4025 #endif 4026 mxge_intr, &sc->ss[0], &sc->ih); 4027 if (err != 0) { 4028 bus_release_resource(sc->dev, SYS_RES_IRQ, 4029 sc->legacy_irq ? 0 : 1, sc->irq_res); 4030 if (!sc->legacy_irq) 4031 pci_release_msi(sc->dev); 4032 } 4033 return err; 4034 } 4035 4036 static void 4037 mxge_rem_msix_irqs(mxge_softc_t *sc) 4038 { 4039 int i, rid; 4040 4041 for (i = 0; i < sc->num_slices; i++) { 4042 if (sc->msix_ih[i] != NULL) { 4043 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4044 sc->msix_ih[i]); 4045 sc->msix_ih[i] = NULL; 4046 } 4047 } 4048 free(sc->msix_ih, M_DEVBUF); 4049 4050 for (i = 0; i < sc->num_slices; i++) { 4051 rid = i + 1; 4052 if (sc->msix_irq_res[i] != NULL) 4053 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4054 sc->msix_irq_res[i]); 4055 sc->msix_irq_res[i] = NULL; 4056 } 4057 free(sc->msix_irq_res, M_DEVBUF); 4058 4059 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4060 sc->msix_table_res); 4061 4062 pci_release_msi(sc->dev); 4063 return; 4064 } 4065 4066 static void 4067 mxge_rem_single_irq(mxge_softc_t *sc) 4068 { 4069 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4070 bus_release_resource(sc->dev, SYS_RES_IRQ, 4071 sc->legacy_irq ? 0 : 1, sc->irq_res); 4072 if (!sc->legacy_irq) 4073 pci_release_msi(sc->dev); 4074 } 4075 4076 static void 4077 mxge_rem_irq(mxge_softc_t *sc) 4078 { 4079 if (sc->num_slices > 1) 4080 mxge_rem_msix_irqs(sc); 4081 else 4082 mxge_rem_single_irq(sc); 4083 } 4084 4085 static int 4086 mxge_add_irq(mxge_softc_t *sc) 4087 { 4088 int err; 4089 4090 if (sc->num_slices > 1) 4091 err = mxge_add_msix_irqs(sc); 4092 else 4093 err = mxge_add_single_irq(sc); 4094 4095 if (0 && err == 0 && sc->num_slices > 1) { 4096 mxge_rem_msix_irqs(sc); 4097 err = mxge_add_msix_irqs(sc); 4098 } 4099 return err; 4100 } 4101 4102 4103 static int 4104 mxge_attach(device_t dev) 4105 { 4106 mxge_softc_t *sc = device_get_softc(dev); 4107 struct ifnet *ifp; 4108 int err, rid; 4109 4110 sc->dev = dev; 4111 mxge_fetch_tunables(sc); 4112 4113 err = bus_dma_tag_create(NULL, /* parent */ 4114 1, /* alignment */ 4115 0, /* boundary */ 4116 BUS_SPACE_MAXADDR, /* low */ 4117 BUS_SPACE_MAXADDR, /* high */ 4118 NULL, NULL, /* filter */ 4119 65536 + 256, /* maxsize */ 4120 MXGE_MAX_SEND_DESC, /* num segs */ 4121 65536, /* maxsegsize */ 4122 0, /* flags */ 4123 NULL, NULL, /* lock */ 4124 &sc->parent_dmat); /* tag */ 4125 4126 if (err != 0) { 4127 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4128 err); 4129 goto abort_with_nothing; 4130 } 4131 4132 ifp = sc->ifp = if_alloc(IFT_ETHER); 4133 if (ifp == NULL) { 4134 device_printf(dev, "can not if_alloc()\n"); 4135 err = ENOSPC; 4136 goto abort_with_parent_dmat; 4137 } 4138 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4139 4140 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4141 device_get_nameunit(dev)); 4142 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4143 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4144 "%s:drv", device_get_nameunit(dev)); 4145 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4146 MTX_NETWORK_LOCK, MTX_DEF); 4147 4148 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4149 4150 mxge_setup_cfg_space(sc); 4151 4152 /* Map the board into the kernel */ 4153 rid = PCIR_BARS; 4154 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4155 ~0, 1, RF_ACTIVE); 4156 if (sc->mem_res == NULL) { 4157 device_printf(dev, "could not map memory\n"); 4158 err = ENXIO; 4159 goto abort_with_lock; 4160 } 4161 sc->sram = rman_get_virtual(sc->mem_res); 4162 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4163 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4164 device_printf(dev, "impossible memory region size %ld\n", 4165 rman_get_size(sc->mem_res)); 4166 err = ENXIO; 4167 goto abort_with_mem_res; 4168 } 4169 4170 /* make NULL terminated copy of the EEPROM strings section of 4171 lanai SRAM */ 4172 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4173 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4174 rman_get_bushandle(sc->mem_res), 4175 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4176 sc->eeprom_strings, 4177 MXGE_EEPROM_STRINGS_SIZE - 2); 4178 err = mxge_parse_strings(sc); 4179 if (err != 0) 4180 goto abort_with_mem_res; 4181 4182 /* Enable write combining for efficient use of PCIe bus */ 4183 mxge_enable_wc(sc); 4184 4185 /* Allocate the out of band dma memory */ 4186 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4187 sizeof (mxge_cmd_t), 64); 4188 if (err != 0) 4189 goto abort_with_mem_res; 4190 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4191 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4192 if (err != 0) 4193 goto abort_with_cmd_dma; 4194 4195 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4196 if (err != 0) 4197 goto abort_with_zeropad_dma; 4198 4199 /* select & load the firmware */ 4200 err = mxge_select_firmware(sc); 4201 if (err != 0) 4202 goto abort_with_dmabench; 4203 sc->intr_coal_delay = mxge_intr_coal_delay; 4204 4205 mxge_slice_probe(sc); 4206 err = mxge_alloc_slices(sc); 4207 if (err != 0) 4208 goto abort_with_dmabench; 4209 4210 err = mxge_reset(sc, 0); 4211 if (err != 0) 4212 goto abort_with_slices; 4213 4214 err = mxge_alloc_rings(sc); 4215 if (err != 0) { 4216 device_printf(sc->dev, "failed to allocate rings\n"); 4217 goto abort_with_dmabench; 4218 } 4219 4220 err = mxge_add_irq(sc); 4221 if (err != 0) { 4222 device_printf(sc->dev, "failed to add irq\n"); 4223 goto abort_with_rings; 4224 } 4225 4226 ifp->if_baudrate = IF_Gbps(10UL); 4227 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4228 IFCAP_VLAN_MTU | IFCAP_LRO; 4229 4230 #ifdef MXGE_NEW_VLAN_API 4231 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4232 #endif 4233 4234 sc->max_mtu = mxge_max_mtu(sc); 4235 if (sc->max_mtu >= 9000) 4236 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4237 else 4238 device_printf(dev, "MTU limited to %d. Install " 4239 "latest firmware for 9000 byte jumbo support\n", 4240 sc->max_mtu - ETHER_HDR_LEN); 4241 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4242 ifp->if_capenable = ifp->if_capabilities; 4243 if (sc->lro_cnt == 0) 4244 ifp->if_capenable &= ~IFCAP_LRO; 4245 sc->csum_flag = 1; 4246 ifp->if_init = mxge_init; 4247 ifp->if_softc = sc; 4248 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4249 ifp->if_ioctl = mxge_ioctl; 4250 ifp->if_start = mxge_start; 4251 /* Initialise the ifmedia structure */ 4252 ifmedia_init(&sc->media, 0, mxge_media_change, 4253 mxge_media_status); 4254 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4255 mxge_media_probe(sc); 4256 ether_ifattach(ifp, sc->mac_addr); 4257 /* ether_ifattach sets mtu to 1500 */ 4258 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 4259 ifp->if_mtu = 9000; 4260 4261 mxge_add_sysctls(sc); 4262 return 0; 4263 4264 abort_with_rings: 4265 mxge_free_rings(sc); 4266 abort_with_slices: 4267 mxge_free_slices(sc); 4268 abort_with_dmabench: 4269 mxge_dma_free(&sc->dmabench_dma); 4270 abort_with_zeropad_dma: 4271 mxge_dma_free(&sc->zeropad_dma); 4272 abort_with_cmd_dma: 4273 mxge_dma_free(&sc->cmd_dma); 4274 abort_with_mem_res: 4275 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4276 abort_with_lock: 4277 pci_disable_busmaster(dev); 4278 mtx_destroy(&sc->cmd_mtx); 4279 mtx_destroy(&sc->driver_mtx); 4280 if_free(ifp); 4281 abort_with_parent_dmat: 4282 bus_dma_tag_destroy(sc->parent_dmat); 4283 4284 abort_with_nothing: 4285 return err; 4286 } 4287 4288 static int 4289 mxge_detach(device_t dev) 4290 { 4291 mxge_softc_t *sc = device_get_softc(dev); 4292 4293 if (mxge_vlans_active(sc)) { 4294 device_printf(sc->dev, 4295 "Detach vlans before removing module\n"); 4296 return EBUSY; 4297 } 4298 mtx_lock(&sc->driver_mtx); 4299 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4300 mxge_close(sc); 4301 mtx_unlock(&sc->driver_mtx); 4302 ether_ifdetach(sc->ifp); 4303 callout_drain(&sc->co_hdl); 4304 ifmedia_removeall(&sc->media); 4305 mxge_dummy_rdma(sc, 0); 4306 mxge_rem_sysctls(sc); 4307 mxge_rem_irq(sc); 4308 mxge_free_rings(sc); 4309 mxge_free_slices(sc); 4310 mxge_dma_free(&sc->dmabench_dma); 4311 mxge_dma_free(&sc->zeropad_dma); 4312 mxge_dma_free(&sc->cmd_dma); 4313 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4314 pci_disable_busmaster(dev); 4315 mtx_destroy(&sc->cmd_mtx); 4316 mtx_destroy(&sc->driver_mtx); 4317 if_free(sc->ifp); 4318 bus_dma_tag_destroy(sc->parent_dmat); 4319 return 0; 4320 } 4321 4322 static int 4323 mxge_shutdown(device_t dev) 4324 { 4325 return 0; 4326 } 4327 4328 /* 4329 This file uses Myri10GE driver indentation. 4330 4331 Local Variables: 4332 c-file-style:"linux" 4333 tab-width:8 4334 End: 4335 */ 4336