1 /******************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause
3
4 Copyright (c) 2006-2013, Myricom Inc.
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Myricom Inc, nor the names of its
14 contributors may be used to endorse or promote products derived from
15 this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28
29 ***************************************************************************/
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/linker.h>
34 #include <sys/firmware.h>
35 #include <sys/endian.h>
36 #include <sys/sockio.h>
37 #include <sys/mbuf.h>
38 #include <sys/malloc.h>
39 #include <sys/kdb.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/socket.h>
44 #include <sys/sysctl.h>
45 #include <sys/sx.h>
46 #include <sys/taskqueue.h>
47 #include <contrib/zlib/zlib.h>
48 #include <dev/zlib/zcalloc.h>
49
50 #include <net/if.h>
51 #include <net/if_var.h>
52 #include <net/if_arp.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56
57 #include <net/bpf.h>
58
59 #include <net/if_types.h>
60 #include <net/if_vlan_var.h>
61
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip6.h>
66 #include <netinet/tcp.h>
67 #include <netinet/tcp_lro.h>
68 #include <netinet6/ip6_var.h>
69
70 #include <machine/bus.h>
71 #include <machine/in_cksum.h>
72 #include <machine/resource.h>
73 #include <sys/bus.h>
74 #include <sys/rman.h>
75 #include <sys/smp.h>
76
77 #include <dev/pci/pcireg.h>
78 #include <dev/pci/pcivar.h>
79 #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
80
81 #include <vm/vm.h> /* for pmap_mapdev() */
82 #include <vm/pmap.h>
83
84 #if defined(__i386) || defined(__amd64)
85 #include <machine/specialreg.h>
86 #endif
87
88 #include <dev/mxge/mxge_mcp.h>
89 #include <dev/mxge/mcp_gen_header.h>
90 /*#define MXGE_FAKE_IFP*/
91 #include <dev/mxge/if_mxge_var.h>
92 #include <sys/buf_ring.h>
93
94 #include "opt_inet.h"
95 #include "opt_inet6.h"
96
97 /* tunable params */
98 static int mxge_nvidia_ecrc_enable = 1;
99 static int mxge_force_firmware = 0;
100 static int mxge_intr_coal_delay = 30;
101 static int mxge_deassert_wait = 1;
102 static int mxge_flow_control = 1;
103 static int mxge_verbose = 0;
104 static int mxge_ticks;
105 static int mxge_max_slices = 1;
106 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
107 static int mxge_always_promisc = 0;
108 static int mxge_initial_mtu = ETHERMTU_JUMBO;
109 static int mxge_throttle = 0;
110 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
111 static char *mxge_fw_aligned = "mxge_eth_z8e";
112 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
113 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
114
115 static int mxge_probe(device_t dev);
116 static int mxge_attach(device_t dev);
117 static int mxge_detach(device_t dev);
118 static int mxge_shutdown(device_t dev);
119 static void mxge_intr(void *arg);
120
121 static device_method_t mxge_methods[] =
122 {
123 /* Device interface */
124 DEVMETHOD(device_probe, mxge_probe),
125 DEVMETHOD(device_attach, mxge_attach),
126 DEVMETHOD(device_detach, mxge_detach),
127 DEVMETHOD(device_shutdown, mxge_shutdown),
128
129 DEVMETHOD_END
130 };
131
132 static driver_t mxge_driver =
133 {
134 "mxge",
135 mxge_methods,
136 sizeof(mxge_softc_t),
137 };
138
139 /* Declare ourselves to be a child of the PCI bus.*/
140 DRIVER_MODULE(mxge, pci, mxge_driver, 0, 0);
141 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
142 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
143
144 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
145 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
146 static int mxge_close(mxge_softc_t *sc, int down);
147 static int mxge_open(mxge_softc_t *sc);
148 static void mxge_tick(void *arg);
149
150 static int
mxge_probe(device_t dev)151 mxge_probe(device_t dev)
152 {
153 int rev;
154
155 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
156 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
157 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
158 rev = pci_get_revid(dev);
159 switch (rev) {
160 case MXGE_PCI_REV_Z8E:
161 device_set_desc(dev, "Myri10G-PCIE-8A");
162 break;
163 case MXGE_PCI_REV_Z8ES:
164 device_set_desc(dev, "Myri10G-PCIE-8B");
165 break;
166 default:
167 device_set_desc(dev, "Myri10G-PCIE-8??");
168 device_printf(dev, "Unrecognized rev %d NIC\n",
169 rev);
170 break;
171 }
172 return 0;
173 }
174 return ENXIO;
175 }
176
177 static void
mxge_enable_wc(mxge_softc_t * sc)178 mxge_enable_wc(mxge_softc_t *sc)
179 {
180 #if defined(__i386) || defined(__amd64)
181 vm_offset_t len;
182 int err;
183
184 sc->wc = 1;
185 len = rman_get_size(sc->mem_res);
186 err = pmap_change_attr((vm_offset_t) sc->sram,
187 len, PAT_WRITE_COMBINING);
188 if (err != 0) {
189 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
190 err);
191 sc->wc = 0;
192 }
193 #endif
194 }
195
196 /* callback to get our DMA address */
197 static void
mxge_dmamap_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)198 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
199 int error)
200 {
201 if (error == 0) {
202 *(bus_addr_t *) arg = segs->ds_addr;
203 }
204 }
205
206 static int
mxge_dma_alloc(mxge_softc_t * sc,mxge_dma_t * dma,size_t bytes,bus_size_t alignment)207 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
208 bus_size_t alignment)
209 {
210 int err;
211 device_t dev = sc->dev;
212 bus_size_t boundary, maxsegsize;
213
214 if (bytes > 4096 && alignment == 4096) {
215 boundary = 0;
216 maxsegsize = bytes;
217 } else {
218 boundary = 4096;
219 maxsegsize = 4096;
220 }
221
222 /* allocate DMAable memory tags */
223 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
224 alignment, /* alignment */
225 boundary, /* boundary */
226 BUS_SPACE_MAXADDR, /* low */
227 BUS_SPACE_MAXADDR, /* high */
228 NULL, NULL, /* filter */
229 bytes, /* maxsize */
230 1, /* num segs */
231 maxsegsize, /* maxsegsize */
232 BUS_DMA_COHERENT, /* flags */
233 NULL, NULL, /* lock */
234 &dma->dmat); /* tag */
235 if (err != 0) {
236 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
237 return err;
238 }
239
240 /* allocate DMAable memory & map */
241 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
242 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
243 | BUS_DMA_ZERO), &dma->map);
244 if (err != 0) {
245 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
246 goto abort_with_dmat;
247 }
248
249 /* load the memory */
250 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
251 mxge_dmamap_callback,
252 (void *)&dma->bus_addr, 0);
253 if (err != 0) {
254 device_printf(dev, "couldn't load map (err = %d)\n", err);
255 goto abort_with_mem;
256 }
257 return 0;
258
259 abort_with_mem:
260 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
261 abort_with_dmat:
262 (void)bus_dma_tag_destroy(dma->dmat);
263 return err;
264 }
265
266 static void
mxge_dma_free(mxge_dma_t * dma)267 mxge_dma_free(mxge_dma_t *dma)
268 {
269 bus_dmamap_unload(dma->dmat, dma->map);
270 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
271 (void)bus_dma_tag_destroy(dma->dmat);
272 }
273
274 /*
275 * The eeprom strings on the lanaiX have the format
276 * SN=x\0
277 * MAC=x:x:x:x:x:x\0
278 * PC=text\0
279 */
280
281 static int
mxge_parse_strings(mxge_softc_t * sc)282 mxge_parse_strings(mxge_softc_t *sc)
283 {
284 char *ptr;
285 int i, found_mac, found_sn2;
286 char *endptr;
287
288 ptr = sc->eeprom_strings;
289 found_mac = 0;
290 found_sn2 = 0;
291 while (*ptr != '\0') {
292 if (strncmp(ptr, "MAC=", 4) == 0) {
293 ptr += 4;
294 for (i = 0;;) {
295 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
296 if (endptr - ptr != 2)
297 goto abort;
298 ptr = endptr;
299 if (++i == 6)
300 break;
301 if (*ptr++ != ':')
302 goto abort;
303 }
304 found_mac = 1;
305 } else if (strncmp(ptr, "PC=", 3) == 0) {
306 ptr += 3;
307 strlcpy(sc->product_code_string, ptr,
308 sizeof(sc->product_code_string));
309 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
310 ptr += 3;
311 strlcpy(sc->serial_number_string, ptr,
312 sizeof(sc->serial_number_string));
313 } else if (strncmp(ptr, "SN2=", 4) == 0) {
314 /* SN2 takes precedence over SN */
315 ptr += 4;
316 found_sn2 = 1;
317 strlcpy(sc->serial_number_string, ptr,
318 sizeof(sc->serial_number_string));
319 }
320 while (*ptr++ != '\0') {}
321 }
322
323 if (found_mac)
324 return 0;
325
326 abort:
327 device_printf(sc->dev, "failed to parse eeprom_strings\n");
328
329 return ENXIO;
330 }
331
332 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
333 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)334 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
335 {
336 uint32_t val;
337 unsigned long base, off;
338 char *va, *cfgptr;
339 device_t pdev, mcp55;
340 uint16_t vendor_id, device_id, word;
341 uintptr_t bus, slot, func, ivend, idev;
342 uint32_t *ptr32;
343
344 if (!mxge_nvidia_ecrc_enable)
345 return;
346
347 pdev = device_get_parent(device_get_parent(sc->dev));
348 if (pdev == NULL) {
349 device_printf(sc->dev, "could not find parent?\n");
350 return;
351 }
352 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
353 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
354
355 if (vendor_id != 0x10de)
356 return;
357
358 base = 0;
359
360 if (device_id == 0x005d) {
361 /* ck804, base address is magic */
362 base = 0xe0000000UL;
363 } else if (device_id >= 0x0374 && device_id <= 0x378) {
364 /* mcp55, base address stored in chipset */
365 mcp55 = pci_find_bsf(0, 0, 0);
366 if (mcp55 &&
367 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
368 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
369 word = pci_read_config(mcp55, 0x90, 2);
370 base = ((unsigned long)word & 0x7ffeU) << 25;
371 }
372 }
373 if (!base)
374 return;
375
376 /* XXXX
377 Test below is commented because it is believed that doing
378 config read/write beyond 0xff will access the config space
379 for the next larger function. Uncomment this and remove
380 the hacky pmap_mapdev() way of accessing config space when
381 FreeBSD grows support for extended pcie config space access
382 */
383 #if 0
384 /* See if we can, by some miracle, access the extended
385 config space */
386 val = pci_read_config(pdev, 0x178, 4);
387 if (val != 0xffffffff) {
388 val |= 0x40;
389 pci_write_config(pdev, 0x178, val, 4);
390 return;
391 }
392 #endif
393 /* Rather than using normal pci config space writes, we must
394 * map the Nvidia config space ourselves. This is because on
395 * opteron/nvidia class machine the 0xe000000 mapping is
396 * handled by the nvidia chipset, that means the internal PCI
397 * device (the on-chip northbridge), or the amd-8131 bridge
398 * and things behind them are not visible by this method.
399 */
400
401 BUS_READ_IVAR(device_get_parent(pdev), pdev,
402 PCI_IVAR_BUS, &bus);
403 BUS_READ_IVAR(device_get_parent(pdev), pdev,
404 PCI_IVAR_SLOT, &slot);
405 BUS_READ_IVAR(device_get_parent(pdev), pdev,
406 PCI_IVAR_FUNCTION, &func);
407 BUS_READ_IVAR(device_get_parent(pdev), pdev,
408 PCI_IVAR_VENDOR, &ivend);
409 BUS_READ_IVAR(device_get_parent(pdev), pdev,
410 PCI_IVAR_DEVICE, &idev);
411
412 off = base
413 + 0x00100000UL * (unsigned long)bus
414 + 0x00001000UL * (unsigned long)(func
415 + 8 * slot);
416
417 /* map it into the kernel */
418 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
419
420 if (va == NULL) {
421 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
422 return;
423 }
424 /* get a pointer to the config space mapped into the kernel */
425 cfgptr = va + (off & PAGE_MASK);
426
427 /* make sure that we can really access it */
428 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
429 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
430 if (! (vendor_id == ivend && device_id == idev)) {
431 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
432 vendor_id, device_id);
433 pmap_unmapdev(va, PAGE_SIZE);
434 return;
435 }
436
437 ptr32 = (uint32_t*)(cfgptr + 0x178);
438 val = *ptr32;
439
440 if (val == 0xffffffff) {
441 device_printf(sc->dev, "extended mapping failed\n");
442 pmap_unmapdev(va, PAGE_SIZE);
443 return;
444 }
445 *ptr32 = val | 0x40;
446 pmap_unmapdev(va, PAGE_SIZE);
447 if (mxge_verbose)
448 device_printf(sc->dev,
449 "Enabled ECRC on upstream Nvidia bridge "
450 "at %d:%d:%d\n",
451 (int)bus, (int)slot, (int)func);
452 return;
453 }
454 #else
455 static void
mxge_enable_nvidia_ecrc(mxge_softc_t * sc)456 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
457 {
458 device_printf(sc->dev,
459 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
460 return;
461 }
462 #endif
463
464 static int
mxge_dma_test(mxge_softc_t * sc,int test_type)465 mxge_dma_test(mxge_softc_t *sc, int test_type)
466 {
467 mxge_cmd_t cmd;
468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
469 int status;
470 uint32_t len;
471 char *test = " ";
472
473 /* Run a small DMA test.
474 * The magic multipliers to the length tell the firmware
475 * to do DMA read, write, or read+write tests. The
476 * results are returned in cmd.data0. The upper 16
477 * bits of the return is the number of transfers completed.
478 * The lower 16 bits is the time in 0.5us ticks that the
479 * transfers took to complete.
480 */
481
482 len = sc->tx_boundary;
483
484 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
485 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
486 cmd.data2 = len * 0x10000;
487 status = mxge_send_cmd(sc, test_type, &cmd);
488 if (status != 0) {
489 test = "read";
490 goto abort;
491 }
492 sc->read_dma = ((cmd.data0>>16) * len * 2) /
493 (cmd.data0 & 0xffff);
494 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
495 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
496 cmd.data2 = len * 0x1;
497 status = mxge_send_cmd(sc, test_type, &cmd);
498 if (status != 0) {
499 test = "write";
500 goto abort;
501 }
502 sc->write_dma = ((cmd.data0>>16) * len * 2) /
503 (cmd.data0 & 0xffff);
504
505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
507 cmd.data2 = len * 0x10001;
508 status = mxge_send_cmd(sc, test_type, &cmd);
509 if (status != 0) {
510 test = "read/write";
511 goto abort;
512 }
513 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
514 (cmd.data0 & 0xffff);
515
516 abort:
517 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
518 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
519 test, status);
520
521 return status;
522 }
523
524 /*
525 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
526 * when the PCI-E Completion packets are aligned on an 8-byte
527 * boundary. Some PCI-E chip sets always align Completion packets; on
528 * the ones that do not, the alignment can be enforced by enabling
529 * ECRC generation (if supported).
530 *
531 * When PCI-E Completion packets are not aligned, it is actually more
532 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
533 *
534 * If the driver can neither enable ECRC nor verify that it has
535 * already been enabled, then it must use a firmware image which works
536 * around unaligned completion packets (ethp_z8e.dat), and it should
537 * also ensure that it never gives the device a Read-DMA which is
538 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
539 * enabled, then the driver should use the aligned (eth_z8e.dat)
540 * firmware image, and set tx_boundary to 4KB.
541 */
542
543 static int
mxge_firmware_probe(mxge_softc_t * sc)544 mxge_firmware_probe(mxge_softc_t *sc)
545 {
546 device_t dev = sc->dev;
547 int reg, status;
548 uint16_t pectl;
549
550 sc->tx_boundary = 4096;
551 /*
552 * Verify the max read request size was set to 4KB
553 * before trying the test with 4KB.
554 */
555 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
556 pectl = pci_read_config(dev, reg + 0x8, 2);
557 if ((pectl & (5 << 12)) != (5 << 12)) {
558 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
559 pectl);
560 sc->tx_boundary = 2048;
561 }
562 }
563
564 /*
565 * load the optimized firmware (which assumes aligned PCIe
566 * completions) in order to see if it works on this host.
567 */
568 sc->fw_name = mxge_fw_aligned;
569 status = mxge_load_firmware(sc, 1);
570 if (status != 0) {
571 return status;
572 }
573
574 /*
575 * Enable ECRC if possible
576 */
577 mxge_enable_nvidia_ecrc(sc);
578
579 /*
580 * Run a DMA test which watches for unaligned completions and
581 * aborts on the first one seen. Not required on Z8ES or newer.
582 */
583 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
584 return 0;
585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
586 if (status == 0)
587 return 0; /* keep the aligned firmware */
588
589 if (status != E2BIG)
590 device_printf(dev, "DMA test failed: %d\n", status);
591 if (status == ENOSYS)
592 device_printf(dev, "Falling back to ethp! "
593 "Please install up to date fw\n");
594 return status;
595 }
596
597 static int
mxge_select_firmware(mxge_softc_t * sc)598 mxge_select_firmware(mxge_softc_t *sc)
599 {
600 int aligned = 0;
601 int force_firmware = mxge_force_firmware;
602
603 if (sc->throttle)
604 force_firmware = sc->throttle;
605
606 if (force_firmware != 0) {
607 if (force_firmware == 1)
608 aligned = 1;
609 else
610 aligned = 0;
611 if (mxge_verbose)
612 device_printf(sc->dev,
613 "Assuming %s completions (forced)\n",
614 aligned ? "aligned" : "unaligned");
615 goto abort;
616 }
617
618 /* if the PCIe link width is 4 or less, we can use the aligned
619 firmware and skip any checks */
620 if (sc->link_width != 0 && sc->link_width <= 4) {
621 device_printf(sc->dev,
622 "PCIe x%d Link, expect reduced performance\n",
623 sc->link_width);
624 aligned = 1;
625 goto abort;
626 }
627
628 if (0 == mxge_firmware_probe(sc))
629 return 0;
630
631 abort:
632 if (aligned) {
633 sc->fw_name = mxge_fw_aligned;
634 sc->tx_boundary = 4096;
635 } else {
636 sc->fw_name = mxge_fw_unaligned;
637 sc->tx_boundary = 2048;
638 }
639 return (mxge_load_firmware(sc, 0));
640 }
641
642 static int
mxge_validate_firmware(mxge_softc_t * sc,const mcp_gen_header_t * hdr)643 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
644 {
645
646 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
647 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
648 be32toh(hdr->mcp_type));
649 return EIO;
650 }
651
652 /* save firmware version for sysctl */
653 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
654 if (mxge_verbose)
655 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
656
657 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
658 &sc->fw_ver_minor, &sc->fw_ver_tiny);
659
660 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
661 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
662 device_printf(sc->dev, "Found firmware version %s\n",
663 sc->fw_version);
664 device_printf(sc->dev, "Driver needs %d.%d\n",
665 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
666 return EINVAL;
667 }
668 return 0;
669
670 }
671
672 static int
mxge_load_firmware_helper(mxge_softc_t * sc,uint32_t * limit)673 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
674 {
675 z_stream zs;
676 char *inflate_buffer;
677 const struct firmware *fw;
678 const mcp_gen_header_t *hdr;
679 unsigned hdr_offset;
680 int status;
681 unsigned int i;
682 size_t fw_len;
683
684 fw = firmware_get(sc->fw_name);
685 if (fw == NULL) {
686 device_printf(sc->dev, "Could not find firmware image %s\n",
687 sc->fw_name);
688 return ENOENT;
689 }
690
691 /* setup zlib and decompress f/w */
692 bzero(&zs, sizeof (zs));
693 zs.zalloc = zcalloc_nowait;
694 zs.zfree = zcfree;
695 status = inflateInit(&zs);
696 if (status != Z_OK) {
697 status = EIO;
698 goto abort_with_fw;
699 }
700
701 /* the uncompressed size is stored as the firmware version,
702 which would otherwise go unused */
703 fw_len = (size_t) fw->version;
704 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
705 if (inflate_buffer == NULL)
706 goto abort_with_zs;
707 zs.avail_in = fw->datasize;
708 zs.next_in = __DECONST(char *, fw->data);
709 zs.avail_out = fw_len;
710 zs.next_out = inflate_buffer;
711 status = inflate(&zs, Z_FINISH);
712 if (status != Z_STREAM_END) {
713 device_printf(sc->dev, "zlib %d\n", status);
714 status = EIO;
715 goto abort_with_buffer;
716 }
717
718 /* check id */
719 hdr_offset = htobe32(*(const uint32_t *)
720 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
721 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
722 device_printf(sc->dev, "Bad firmware file");
723 status = EIO;
724 goto abort_with_buffer;
725 }
726 hdr = (const void*)(inflate_buffer + hdr_offset);
727
728 status = mxge_validate_firmware(sc, hdr);
729 if (status != 0)
730 goto abort_with_buffer;
731
732 /* Copy the inflated firmware to NIC SRAM. */
733 for (i = 0; i < fw_len; i += 256) {
734 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
735 inflate_buffer + i,
736 min(256U, (unsigned)(fw_len - i)));
737 wmb();
738 (void)*sc->sram;
739 wmb();
740 }
741
742 *limit = fw_len;
743 status = 0;
744 abort_with_buffer:
745 free(inflate_buffer, M_TEMP);
746 abort_with_zs:
747 inflateEnd(&zs);
748 abort_with_fw:
749 firmware_put(fw, FIRMWARE_UNLOAD);
750 return status;
751 }
752
753 /*
754 * Enable or disable periodic RDMAs from the host to make certain
755 * chipsets resend dropped PCIe messages
756 */
757
758 static void
mxge_dummy_rdma(mxge_softc_t * sc,int enable)759 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
760 {
761 char buf_bytes[72];
762 volatile uint32_t *confirm;
763 volatile char *submit;
764 uint32_t *buf, dma_low, dma_high;
765 int i;
766
767 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
768
769 /* clear confirmation addr */
770 confirm = (volatile uint32_t *)sc->cmd;
771 *confirm = 0;
772 wmb();
773
774 /* send an rdma command to the PCIe engine, and wait for the
775 response in the confirmation address. The firmware should
776 write a -1 there to indicate it is alive and well
777 */
778
779 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
780 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
781 buf[0] = htobe32(dma_high); /* confirm addr MSW */
782 buf[1] = htobe32(dma_low); /* confirm addr LSW */
783 buf[2] = htobe32(0xffffffff); /* confirm data */
784 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
785 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
786 buf[3] = htobe32(dma_high); /* dummy addr MSW */
787 buf[4] = htobe32(dma_low); /* dummy addr LSW */
788 buf[5] = htobe32(enable); /* enable? */
789
790 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
791
792 mxge_pio_copy(submit, buf, 64);
793 wmb();
794 DELAY(1000);
795 wmb();
796 i = 0;
797 while (*confirm != 0xffffffff && i < 20) {
798 DELAY(1000);
799 i++;
800 }
801 if (*confirm != 0xffffffff) {
802 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
803 (enable ? "enable" : "disable"), confirm,
804 *confirm);
805 }
806 return;
807 }
808
809 static int
mxge_send_cmd(mxge_softc_t * sc,uint32_t cmd,mxge_cmd_t * data)810 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
811 {
812 mcp_cmd_t *buf;
813 char buf_bytes[sizeof(*buf) + 8];
814 volatile mcp_cmd_response_t *response = sc->cmd;
815 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
816 uint32_t dma_low, dma_high;
817 int err, sleep_total = 0;
818
819 /* ensure buf is aligned to 8 bytes */
820 buf = (mcp_cmd_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
821
822 buf->data0 = htobe32(data->data0);
823 buf->data1 = htobe32(data->data1);
824 buf->data2 = htobe32(data->data2);
825 buf->cmd = htobe32(cmd);
826 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
827 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
828
829 buf->response_addr.low = htobe32(dma_low);
830 buf->response_addr.high = htobe32(dma_high);
831 mtx_lock(&sc->cmd_mtx);
832 response->result = 0xffffffff;
833 wmb();
834 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
835
836 /* wait up to 20ms */
837 err = EAGAIN;
838 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
839 bus_dmamap_sync(sc->cmd_dma.dmat,
840 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
841 wmb();
842 switch (be32toh(response->result)) {
843 case 0:
844 data->data0 = be32toh(response->data);
845 err = 0;
846 break;
847 case 0xffffffff:
848 DELAY(1000);
849 break;
850 case MXGEFW_CMD_UNKNOWN:
851 err = ENOSYS;
852 break;
853 case MXGEFW_CMD_ERROR_UNALIGNED:
854 err = E2BIG;
855 break;
856 case MXGEFW_CMD_ERROR_BUSY:
857 err = EBUSY;
858 break;
859 case MXGEFW_CMD_ERROR_I2C_ABSENT:
860 err = ENXIO;
861 break;
862 default:
863 device_printf(sc->dev,
864 "mxge: command %d "
865 "failed, result = %d\n",
866 cmd, be32toh(response->result));
867 err = ENXIO;
868 break;
869 }
870 if (err != EAGAIN)
871 break;
872 }
873 if (err == EAGAIN)
874 device_printf(sc->dev, "mxge: command %d timed out"
875 "result = %d\n",
876 cmd, be32toh(response->result));
877 mtx_unlock(&sc->cmd_mtx);
878 return err;
879 }
880
881 static int
mxge_adopt_running_firmware(mxge_softc_t * sc)882 mxge_adopt_running_firmware(mxge_softc_t *sc)
883 {
884 struct mcp_gen_header *hdr;
885 const size_t bytes = sizeof (struct mcp_gen_header);
886 size_t hdr_offset;
887 int status;
888
889 /* find running firmware header */
890 hdr_offset = htobe32(*(volatile uint32_t *)
891 (sc->sram + MCP_HEADER_PTR_OFFSET));
892
893 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
894 device_printf(sc->dev,
895 "Running firmware has bad header offset (%d)\n",
896 (int)hdr_offset);
897 return EIO;
898 }
899
900 /* copy header of running firmware from SRAM to host memory to
901 * validate firmware */
902 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
903 if (hdr == NULL) {
904 device_printf(sc->dev, "could not malloc firmware hdr\n");
905 return ENOMEM;
906 }
907 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
908 rman_get_bushandle(sc->mem_res),
909 hdr_offset, (char *)hdr, bytes);
910 status = mxge_validate_firmware(sc, hdr);
911 free(hdr, M_DEVBUF);
912
913 /*
914 * check to see if adopted firmware has bug where adopting
915 * it will cause broadcasts to be filtered unless the NIC
916 * is kept in ALLMULTI mode
917 */
918 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
919 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
920 sc->adopted_rx_filter_bug = 1;
921 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
922 "working around rx filter bug\n",
923 sc->fw_ver_major, sc->fw_ver_minor,
924 sc->fw_ver_tiny);
925 }
926
927 return status;
928 }
929
930 static int
mxge_load_firmware(mxge_softc_t * sc,int adopt)931 mxge_load_firmware(mxge_softc_t *sc, int adopt)
932 {
933 volatile uint32_t *confirm;
934 volatile char *submit;
935 char buf_bytes[72];
936 uint32_t *buf, size, dma_low, dma_high;
937 int status, i;
938
939 buf = (uint32_t *)((uintptr_t)(buf_bytes + 7) & ~7UL);
940
941 size = sc->sram_size;
942 status = mxge_load_firmware_helper(sc, &size);
943 if (status) {
944 if (!adopt)
945 return status;
946 /* Try to use the currently running firmware, if
947 it is new enough */
948 status = mxge_adopt_running_firmware(sc);
949 if (status) {
950 device_printf(sc->dev,
951 "failed to adopt running firmware\n");
952 return status;
953 }
954 device_printf(sc->dev,
955 "Successfully adopted running firmware\n");
956 if (sc->tx_boundary == 4096) {
957 device_printf(sc->dev,
958 "Using firmware currently running on NIC"
959 ". For optimal\n");
960 device_printf(sc->dev,
961 "performance consider loading optimized "
962 "firmware\n");
963 }
964 sc->fw_name = mxge_fw_unaligned;
965 sc->tx_boundary = 2048;
966 return 0;
967 }
968 /* clear confirmation addr */
969 confirm = (volatile uint32_t *)sc->cmd;
970 *confirm = 0;
971 wmb();
972 /* send a reload command to the bootstrap MCP, and wait for the
973 response in the confirmation address. The firmware should
974 write a -1 there to indicate it is alive and well
975 */
976
977 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
978 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
979
980 buf[0] = htobe32(dma_high); /* confirm addr MSW */
981 buf[1] = htobe32(dma_low); /* confirm addr LSW */
982 buf[2] = htobe32(0xffffffff); /* confirm data */
983
984 /* FIX: All newest firmware should un-protect the bottom of
985 the sram before handoff. However, the very first interfaces
986 do not. Therefore the handoff copy must skip the first 8 bytes
987 */
988 /* where the code starts*/
989 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
990 buf[4] = htobe32(size - 8); /* length of code */
991 buf[5] = htobe32(8); /* where to copy to */
992 buf[6] = htobe32(0); /* where to jump to */
993
994 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
995 mxge_pio_copy(submit, buf, 64);
996 wmb();
997 DELAY(1000);
998 wmb();
999 i = 0;
1000 while (*confirm != 0xffffffff && i < 20) {
1001 DELAY(1000*10);
1002 i++;
1003 bus_dmamap_sync(sc->cmd_dma.dmat,
1004 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1005 }
1006 if (*confirm != 0xffffffff) {
1007 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1008 confirm, *confirm);
1009
1010 return ENXIO;
1011 }
1012 return 0;
1013 }
1014
1015 static int
mxge_update_mac_address(mxge_softc_t * sc)1016 mxge_update_mac_address(mxge_softc_t *sc)
1017 {
1018 mxge_cmd_t cmd;
1019 uint8_t *addr = sc->mac_addr;
1020 int status;
1021
1022 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1023 | (addr[2] << 8) | addr[3]);
1024
1025 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1026
1027 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1028 return status;
1029 }
1030
1031 static int
mxge_change_pause(mxge_softc_t * sc,int pause)1032 mxge_change_pause(mxge_softc_t *sc, int pause)
1033 {
1034 mxge_cmd_t cmd;
1035 int status;
1036
1037 if (pause)
1038 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1039 &cmd);
1040 else
1041 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1042 &cmd);
1043
1044 if (status) {
1045 device_printf(sc->dev, "Failed to set flow control mode\n");
1046 return ENXIO;
1047 }
1048 sc->pause = pause;
1049 return 0;
1050 }
1051
1052 static void
mxge_change_promisc(mxge_softc_t * sc,int promisc)1053 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1054 {
1055 mxge_cmd_t cmd;
1056 int status;
1057
1058 if (mxge_always_promisc)
1059 promisc = 1;
1060
1061 if (promisc)
1062 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1063 &cmd);
1064 else
1065 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1066 &cmd);
1067
1068 if (status) {
1069 device_printf(sc->dev, "Failed to set promisc mode\n");
1070 }
1071 }
1072
1073 struct mxge_add_maddr_ctx {
1074 mxge_softc_t *sc;
1075 int error;
1076 };
1077
1078 static u_int
mxge_add_maddr(void * arg,struct sockaddr_dl * sdl,u_int cnt)1079 mxge_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
1080 {
1081 struct mxge_add_maddr_ctx *ctx = arg;
1082 mxge_cmd_t cmd;
1083
1084 if (ctx->error != 0)
1085 return (0);
1086 bcopy(LLADDR(sdl), &cmd.data0, 4);
1087 bcopy(LLADDR(sdl) + 4, &cmd.data1, 2);
1088 cmd.data0 = htonl(cmd.data0);
1089 cmd.data1 = htonl(cmd.data1);
1090
1091 ctx->error = mxge_send_cmd(ctx->sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1092
1093 return (1);
1094 }
1095
1096 static void
mxge_set_multicast_list(mxge_softc_t * sc)1097 mxge_set_multicast_list(mxge_softc_t *sc)
1098 {
1099 struct mxge_add_maddr_ctx ctx;
1100 if_t ifp = sc->ifp;
1101 mxge_cmd_t cmd;
1102 int err;
1103
1104 /* This firmware is known to not support multicast */
1105 if (!sc->fw_multicast_support)
1106 return;
1107
1108 /* Disable multicast filtering while we play with the lists*/
1109 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1110 if (err != 0) {
1111 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1112 " error status: %d\n", err);
1113 return;
1114 }
1115
1116 if (sc->adopted_rx_filter_bug)
1117 return;
1118
1119 if (if_getflags(ifp) & IFF_ALLMULTI)
1120 /* request to disable multicast filtering, so quit here */
1121 return;
1122
1123 /* Flush all the filters */
1124
1125 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1126 if (err != 0) {
1127 device_printf(sc->dev,
1128 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1129 ", error status: %d\n", err);
1130 return;
1131 }
1132
1133 /* Walk the multicast list, and add each address */
1134 ctx.sc = sc;
1135 ctx.error = 0;
1136 if_foreach_llmaddr(ifp, mxge_add_maddr, &ctx);
1137 if (ctx.error != 0) {
1138 device_printf(sc->dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1139 "error status:" "%d\t", ctx.error);
1140 /* abort, leaving multicast filtering off */
1141 return;
1142 }
1143
1144 /* Enable multicast filtering */
1145 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1146 if (err != 0) {
1147 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1148 ", error status: %d\n", err);
1149 }
1150 }
1151
1152 static int
mxge_max_mtu(mxge_softc_t * sc)1153 mxge_max_mtu(mxge_softc_t *sc)
1154 {
1155 mxge_cmd_t cmd;
1156 int status;
1157
1158 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1159 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1160
1161 /* try to set nbufs to see if it we can
1162 use virtually contiguous jumbos */
1163 cmd.data0 = 0;
1164 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1165 &cmd);
1166 if (status == 0)
1167 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1168
1169 /* otherwise, we're limited to MJUMPAGESIZE */
1170 return MJUMPAGESIZE - MXGEFW_PAD;
1171 }
1172
1173 static int
mxge_reset(mxge_softc_t * sc,int interrupts_setup)1174 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1175 {
1176 struct mxge_slice_state *ss;
1177 mxge_rx_done_t *rx_done;
1178 volatile uint32_t *irq_claim;
1179 mxge_cmd_t cmd;
1180 int slice, status;
1181
1182 /* try to send a reset command to the card to see if it
1183 is alive */
1184 memset(&cmd, 0, sizeof (cmd));
1185 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1186 if (status != 0) {
1187 device_printf(sc->dev, "failed reset\n");
1188 return ENXIO;
1189 }
1190
1191 mxge_dummy_rdma(sc, 1);
1192
1193 /* set the intrq size */
1194 cmd.data0 = sc->rx_ring_size;
1195 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1196
1197 /*
1198 * Even though we already know how many slices are supported
1199 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1200 * has magic side effects, and must be called after a reset.
1201 * It must be called prior to calling any RSS related cmds,
1202 * including assigning an interrupt queue for anything but
1203 * slice 0. It must also be called *after*
1204 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1205 * the firmware to compute offsets.
1206 */
1207
1208 if (sc->num_slices > 1) {
1209 /* ask the maximum number of slices it supports */
1210 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1211 &cmd);
1212 if (status != 0) {
1213 device_printf(sc->dev,
1214 "failed to get number of slices\n");
1215 return status;
1216 }
1217 /*
1218 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1219 * to setting up the interrupt queue DMA
1220 */
1221 cmd.data0 = sc->num_slices;
1222 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1223 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1224 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1225 &cmd);
1226 if (status != 0) {
1227 device_printf(sc->dev,
1228 "failed to set number of slices\n");
1229 return status;
1230 }
1231 }
1232
1233 if (interrupts_setup) {
1234 /* Now exchange information about interrupts */
1235 for (slice = 0; slice < sc->num_slices; slice++) {
1236 rx_done = &sc->ss[slice].rx_done;
1237 memset(rx_done->entry, 0, sc->rx_ring_size);
1238 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1239 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1240 cmd.data2 = slice;
1241 status |= mxge_send_cmd(sc,
1242 MXGEFW_CMD_SET_INTRQ_DMA,
1243 &cmd);
1244 }
1245 }
1246
1247 status |= mxge_send_cmd(sc,
1248 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1249
1250 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1251
1252 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1253 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1254
1255 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1256 &cmd);
1257 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1258 if (status != 0) {
1259 device_printf(sc->dev, "failed set interrupt parameters\n");
1260 return status;
1261 }
1262
1263 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1264
1265 /* run a DMA benchmark */
1266 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1267
1268 for (slice = 0; slice < sc->num_slices; slice++) {
1269 ss = &sc->ss[slice];
1270
1271 ss->irq_claim = irq_claim + (2 * slice);
1272 /* reset mcp/driver shared state back to 0 */
1273 ss->rx_done.idx = 0;
1274 ss->rx_done.cnt = 0;
1275 ss->tx.req = 0;
1276 ss->tx.done = 0;
1277 ss->tx.pkt_done = 0;
1278 ss->tx.queue_active = 0;
1279 ss->tx.activate = 0;
1280 ss->tx.deactivate = 0;
1281 ss->tx.wake = 0;
1282 ss->tx.defrag = 0;
1283 ss->tx.stall = 0;
1284 ss->rx_big.cnt = 0;
1285 ss->rx_small.cnt = 0;
1286 ss->lc.lro_bad_csum = 0;
1287 ss->lc.lro_queued = 0;
1288 ss->lc.lro_flushed = 0;
1289 if (ss->fw_stats != NULL) {
1290 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1291 }
1292 }
1293 sc->rdma_tags_available = 15;
1294 status = mxge_update_mac_address(sc);
1295 mxge_change_promisc(sc, if_getflags(sc->ifp) & IFF_PROMISC);
1296 mxge_change_pause(sc, sc->pause);
1297 mxge_set_multicast_list(sc);
1298 if (sc->throttle) {
1299 cmd.data0 = sc->throttle;
1300 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1301 &cmd)) {
1302 device_printf(sc->dev,
1303 "can't enable throttle\n");
1304 }
1305 }
1306 return status;
1307 }
1308
1309 static int
mxge_change_throttle(SYSCTL_HANDLER_ARGS)1310 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1311 {
1312 mxge_cmd_t cmd;
1313 mxge_softc_t *sc;
1314 int err;
1315 unsigned int throttle;
1316
1317 sc = arg1;
1318 throttle = sc->throttle;
1319 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1320 if (err != 0) {
1321 return err;
1322 }
1323
1324 if (throttle == sc->throttle)
1325 return 0;
1326
1327 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1328 return EINVAL;
1329
1330 mtx_lock(&sc->driver_mtx);
1331 cmd.data0 = throttle;
1332 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1333 if (err == 0)
1334 sc->throttle = throttle;
1335 mtx_unlock(&sc->driver_mtx);
1336 return err;
1337 }
1338
1339 static int
mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)1340 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1341 {
1342 mxge_softc_t *sc;
1343 unsigned int intr_coal_delay;
1344 int err;
1345
1346 sc = arg1;
1347 intr_coal_delay = sc->intr_coal_delay;
1348 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1349 if (err != 0) {
1350 return err;
1351 }
1352 if (intr_coal_delay == sc->intr_coal_delay)
1353 return 0;
1354
1355 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1356 return EINVAL;
1357
1358 mtx_lock(&sc->driver_mtx);
1359 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1360 sc->intr_coal_delay = intr_coal_delay;
1361
1362 mtx_unlock(&sc->driver_mtx);
1363 return err;
1364 }
1365
1366 static int
mxge_change_flow_control(SYSCTL_HANDLER_ARGS)1367 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1368 {
1369 mxge_softc_t *sc;
1370 unsigned int enabled;
1371 int err;
1372
1373 sc = arg1;
1374 enabled = sc->pause;
1375 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1376 if (err != 0) {
1377 return err;
1378 }
1379 if (enabled == sc->pause)
1380 return 0;
1381
1382 mtx_lock(&sc->driver_mtx);
1383 err = mxge_change_pause(sc, enabled);
1384 mtx_unlock(&sc->driver_mtx);
1385 return err;
1386 }
1387
1388 static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)1389 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1390 {
1391 int err;
1392
1393 if (arg1 == NULL)
1394 return EFAULT;
1395 arg2 = be32toh(*(int *)arg1);
1396 arg1 = NULL;
1397 err = sysctl_handle_int(oidp, arg1, arg2, req);
1398
1399 return err;
1400 }
1401
1402 static void
mxge_rem_sysctls(mxge_softc_t * sc)1403 mxge_rem_sysctls(mxge_softc_t *sc)
1404 {
1405 struct mxge_slice_state *ss;
1406 int slice;
1407
1408 if (sc->slice_sysctl_tree == NULL)
1409 return;
1410
1411 for (slice = 0; slice < sc->num_slices; slice++) {
1412 ss = &sc->ss[slice];
1413 if (ss == NULL || ss->sysctl_tree == NULL)
1414 continue;
1415 sysctl_ctx_free(&ss->sysctl_ctx);
1416 ss->sysctl_tree = NULL;
1417 }
1418 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1419 sc->slice_sysctl_tree = NULL;
1420 }
1421
1422 static void
mxge_add_sysctls(mxge_softc_t * sc)1423 mxge_add_sysctls(mxge_softc_t *sc)
1424 {
1425 struct sysctl_ctx_list *ctx;
1426 struct sysctl_oid_list *children;
1427 mcp_irq_data_t *fw;
1428 struct mxge_slice_state *ss;
1429 int slice;
1430 char slice_num[8];
1431
1432 ctx = device_get_sysctl_ctx(sc->dev);
1433 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1434 fw = sc->ss[0].fw_stats;
1435
1436 /* random information */
1437 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1438 "firmware_version",
1439 CTLFLAG_RD, sc->fw_version,
1440 0, "firmware version");
1441 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1442 "serial_number",
1443 CTLFLAG_RD, sc->serial_number_string,
1444 0, "serial number");
1445 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1446 "product_code",
1447 CTLFLAG_RD, sc->product_code_string,
1448 0, "product_code");
1449 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1450 "pcie_link_width",
1451 CTLFLAG_RD, &sc->link_width,
1452 0, "tx_boundary");
1453 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1454 "tx_boundary",
1455 CTLFLAG_RD, &sc->tx_boundary,
1456 0, "tx_boundary");
1457 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1458 "write_combine",
1459 CTLFLAG_RD, &sc->wc,
1460 0, "write combining PIO?");
1461 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1462 "read_dma_MBs",
1463 CTLFLAG_RD, &sc->read_dma,
1464 0, "DMA Read speed in MB/s");
1465 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1466 "write_dma_MBs",
1467 CTLFLAG_RD, &sc->write_dma,
1468 0, "DMA Write speed in MB/s");
1469 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1470 "read_write_dma_MBs",
1471 CTLFLAG_RD, &sc->read_write_dma,
1472 0, "DMA concurrent Read/Write speed in MB/s");
1473 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1474 "watchdog_resets",
1475 CTLFLAG_RD, &sc->watchdog_resets,
1476 0, "Number of times NIC was reset");
1477
1478 /* performance related tunables */
1479 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1480 "intr_coal_delay", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1481 sc, 0, mxge_change_intr_coal, "I",
1482 "interrupt coalescing delay in usecs");
1483
1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1485 "throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1486 mxge_change_throttle, "I", "transmit throttling");
1487
1488 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1489 "flow_control_enabled",
1490 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1491 mxge_change_flow_control, "I",
1492 "interrupt coalescing delay in usecs");
1493
1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1495 "deassert_wait",
1496 CTLFLAG_RW, &mxge_deassert_wait,
1497 0, "Wait for IRQ line to go low in ihandler");
1498
1499 /* stats block from firmware is in network byte order.
1500 Need to swap it */
1501 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1502 "link_up", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1503 &fw->link_up, 0, mxge_handle_be32, "I", "link up");
1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1505 "rdma_tags_available", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1506 &fw->rdma_tags_available, 0, mxge_handle_be32, "I",
1507 "rdma_tags_available");
1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1509 "dropped_bad_crc32", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1510 &fw->dropped_bad_crc32, 0, mxge_handle_be32, "I",
1511 "dropped_bad_crc32");
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1513 "dropped_bad_phy", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1514 &fw->dropped_bad_phy, 0, mxge_handle_be32, "I", "dropped_bad_phy");
1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1516 "dropped_link_error_or_filtered",
1517 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1518 &fw->dropped_link_error_or_filtered, 0, mxge_handle_be32, "I",
1519 "dropped_link_error_or_filtered");
1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1521 "dropped_link_overflow",
1522 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1523 &fw->dropped_link_overflow, 0, mxge_handle_be32, "I",
1524 "dropped_link_overflow");
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "dropped_multicast_filtered",
1527 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1528 &fw->dropped_multicast_filtered, 0, mxge_handle_be32, "I",
1529 "dropped_multicast_filtered");
1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1531 "dropped_no_big_buffer",
1532 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1533 &fw->dropped_no_big_buffer, 0, mxge_handle_be32, "I",
1534 "dropped_no_big_buffer");
1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1536 "dropped_no_small_buffer",
1537 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1538 &fw->dropped_no_small_buffer, 0, mxge_handle_be32, "I",
1539 "dropped_no_small_buffer");
1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1541 "dropped_overrun",
1542 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1543 &fw->dropped_overrun, 0, mxge_handle_be32, "I",
1544 "dropped_overrun");
1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546 "dropped_pause", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1547 &fw->dropped_pause, 0, mxge_handle_be32, "I", "dropped_pause");
1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1549 "dropped_runt", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1550 &fw->dropped_runt, 0, mxge_handle_be32, "I", "dropped_runt");
1551
1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1553 "dropped_unicast_filtered",
1554 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
1555 &fw->dropped_unicast_filtered, 0, mxge_handle_be32, "I",
1556 "dropped_unicast_filtered");
1557
1558 /* verbose printing? */
1559 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1560 "verbose",
1561 CTLFLAG_RW, &mxge_verbose,
1562 0, "verbose printing");
1563
1564 /* add counters exported for debugging from all slices */
1565 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1566 sc->slice_sysctl_tree =
1567 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1568 "slice", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1569
1570 for (slice = 0; slice < sc->num_slices; slice++) {
1571 ss = &sc->ss[slice];
1572 sysctl_ctx_init(&ss->sysctl_ctx);
1573 ctx = &ss->sysctl_ctx;
1574 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1575 sprintf(slice_num, "%d", slice);
1576 ss->sysctl_tree =
1577 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1578 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
1579 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1580 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1581 "rx_small_cnt",
1582 CTLFLAG_RD, &ss->rx_small.cnt,
1583 0, "rx_small_cnt");
1584 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1585 "rx_big_cnt",
1586 CTLFLAG_RD, &ss->rx_big.cnt,
1587 0, "rx_small_cnt");
1588 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1589 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1590 0, "number of lro merge queues flushed");
1591
1592 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1593 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1594 0, "number of bad csums preventing LRO");
1595
1596 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1597 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1598 0, "number of frames appended to lro merge"
1599 "queues");
1600
1601 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1602 "tx_req",
1603 CTLFLAG_RD, &ss->tx.req,
1604 0, "tx_req");
1605
1606 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1607 "tx_done",
1608 CTLFLAG_RD, &ss->tx.done,
1609 0, "tx_done");
1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1611 "tx_pkt_done",
1612 CTLFLAG_RD, &ss->tx.pkt_done,
1613 0, "tx_done");
1614 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1615 "tx_stall",
1616 CTLFLAG_RD, &ss->tx.stall,
1617 0, "tx_stall");
1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1619 "tx_wake",
1620 CTLFLAG_RD, &ss->tx.wake,
1621 0, "tx_wake");
1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1623 "tx_defrag",
1624 CTLFLAG_RD, &ss->tx.defrag,
1625 0, "tx_defrag");
1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1627 "tx_queue_active",
1628 CTLFLAG_RD, &ss->tx.queue_active,
1629 0, "tx_queue_active");
1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1631 "tx_activate",
1632 CTLFLAG_RD, &ss->tx.activate,
1633 0, "tx_activate");
1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635 "tx_deactivate",
1636 CTLFLAG_RD, &ss->tx.deactivate,
1637 0, "tx_deactivate");
1638 }
1639 }
1640
1641 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1642 backwards one at a time and handle ring wraps */
1643
1644 static inline void
mxge_submit_req_backwards(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1645 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1646 mcp_kreq_ether_send_t *src, int cnt)
1647 {
1648 int idx, starting_slot;
1649 starting_slot = tx->req;
1650 while (cnt > 1) {
1651 cnt--;
1652 idx = (starting_slot + cnt) & tx->mask;
1653 mxge_pio_copy(&tx->lanai[idx],
1654 &src[cnt], sizeof(*src));
1655 wmb();
1656 }
1657 }
1658
1659 /*
1660 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1661 * at most 32 bytes at a time, so as to avoid involving the software
1662 * pio handler in the nic. We re-write the first segment's flags
1663 * to mark them valid only after writing the entire chain
1664 */
1665
1666 static inline void
mxge_submit_req(mxge_tx_ring_t * tx,mcp_kreq_ether_send_t * src,int cnt)1667 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1668 int cnt)
1669 {
1670 int idx, i;
1671 uint32_t *src_ints;
1672 volatile uint32_t *dst_ints;
1673 mcp_kreq_ether_send_t *srcp;
1674 volatile mcp_kreq_ether_send_t *dstp, *dst;
1675 uint8_t last_flags;
1676
1677 idx = tx->req & tx->mask;
1678
1679 last_flags = src->flags;
1680 src->flags = 0;
1681 wmb();
1682 dst = dstp = &tx->lanai[idx];
1683 srcp = src;
1684
1685 if ((idx + cnt) < tx->mask) {
1686 for (i = 0; i < (cnt - 1); i += 2) {
1687 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1688 wmb(); /* force write every 32 bytes */
1689 srcp += 2;
1690 dstp += 2;
1691 }
1692 } else {
1693 /* submit all but the first request, and ensure
1694 that it is submitted below */
1695 mxge_submit_req_backwards(tx, src, cnt);
1696 i = 0;
1697 }
1698 if (i < cnt) {
1699 /* submit the first request */
1700 mxge_pio_copy(dstp, srcp, sizeof(*src));
1701 wmb(); /* barrier before setting valid flag */
1702 }
1703
1704 /* re-write the last 32-bits with the valid flags */
1705 src->flags = last_flags;
1706 src_ints = (uint32_t *)src;
1707 src_ints+=3;
1708 dst_ints = (volatile uint32_t *)dst;
1709 dst_ints+=3;
1710 *dst_ints = *src_ints;
1711 tx->req += cnt;
1712 wmb();
1713 }
1714
1715 static int
mxge_parse_tx(struct mxge_slice_state * ss,struct mbuf * m,struct mxge_pkt_info * pi)1716 mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1717 struct mxge_pkt_info *pi)
1718 {
1719 struct ether_vlan_header *eh;
1720 uint16_t etype;
1721 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1722 #if IFCAP_TSO6 && defined(INET6)
1723 int nxt;
1724 #endif
1725
1726 eh = mtod(m, struct ether_vlan_header *);
1727 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1728 etype = ntohs(eh->evl_proto);
1729 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1730 } else {
1731 etype = ntohs(eh->evl_encap_proto);
1732 pi->ip_off = ETHER_HDR_LEN;
1733 }
1734
1735 switch (etype) {
1736 case ETHERTYPE_IP:
1737 /*
1738 * ensure ip header is in first mbuf, copy it to a
1739 * scratch buffer if not
1740 */
1741 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1742 pi->ip6 = NULL;
1743 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1744 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1745 ss->scratch);
1746 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1747 }
1748 pi->ip_hlen = pi->ip->ip_hl << 2;
1749 if (!tso)
1750 return 0;
1751
1752 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1753 sizeof(struct tcphdr))) {
1754 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1755 sizeof(struct tcphdr), ss->scratch);
1756 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1757 }
1758 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1759 break;
1760 #if IFCAP_TSO6 && defined(INET6)
1761 case ETHERTYPE_IPV6:
1762 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1763 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1764 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1765 ss->scratch);
1766 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1767 }
1768 nxt = 0;
1769 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1770 pi->ip_hlen -= pi->ip_off;
1771 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1772 return EINVAL;
1773
1774 if (!tso)
1775 return 0;
1776
1777 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1778 return EINVAL;
1779
1780 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1781 sizeof(struct tcphdr))) {
1782 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1783 sizeof(struct tcphdr), ss->scratch);
1784 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1785 }
1786 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1787 break;
1788 #endif
1789 default:
1790 return EINVAL;
1791 }
1792 return 0;
1793 }
1794
1795 #if IFCAP_TSO4
1796
1797 static void
mxge_encap_tso(struct mxge_slice_state * ss,struct mbuf * m,int busdma_seg_cnt,struct mxge_pkt_info * pi)1798 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1799 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1800 {
1801 mxge_tx_ring_t *tx;
1802 mcp_kreq_ether_send_t *req;
1803 bus_dma_segment_t *seg;
1804 uint32_t low, high_swapped;
1805 int len, seglen, cum_len, cum_len_next;
1806 int next_is_first, chop, cnt, rdma_count, small;
1807 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1808 uint8_t flags, flags_next;
1809 static int once;
1810
1811 mss = m->m_pkthdr.tso_segsz;
1812
1813 /* negative cum_len signifies to the
1814 * send loop that we are still in the
1815 * header portion of the TSO packet.
1816 */
1817
1818 cksum_offset = pi->ip_off + pi->ip_hlen;
1819 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1820
1821 /* TSO implies checksum offload on this hardware */
1822 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1823 /*
1824 * If packet has full TCP csum, replace it with pseudo hdr
1825 * sum that the NIC expects, otherwise the NIC will emit
1826 * packets with bad TCP checksums.
1827 */
1828 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1829 if (pi->ip6) {
1830 #if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1831 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1832 sum = in6_cksum_pseudo(pi->ip6,
1833 m->m_pkthdr.len - cksum_offset,
1834 IPPROTO_TCP, 0);
1835 #endif
1836 } else {
1837 #ifdef INET
1838 m->m_pkthdr.csum_flags |= CSUM_TCP;
1839 sum = in_pseudo(pi->ip->ip_src.s_addr,
1840 pi->ip->ip_dst.s_addr,
1841 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1842 cksum_offset)));
1843 #endif
1844 }
1845 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1846 cksum_offset, sizeof(sum), (caddr_t)&sum);
1847 }
1848 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1849
1850 /* for TSO, pseudo_hdr_offset holds mss.
1851 * The firmware figures out where to put
1852 * the checksum by parsing the header. */
1853 pseudo_hdr_offset = htobe16(mss);
1854
1855 if (pi->ip6) {
1856 /*
1857 * for IPv6 TSO, the "checksum offset" is re-purposed
1858 * to store the TCP header len
1859 */
1860 cksum_offset = (pi->tcp->th_off << 2);
1861 }
1862
1863 tx = &ss->tx;
1864 req = tx->req_list;
1865 seg = tx->seg_list;
1866 cnt = 0;
1867 rdma_count = 0;
1868 /* "rdma_count" is the number of RDMAs belonging to the
1869 * current packet BEFORE the current send request. For
1870 * non-TSO packets, this is equal to "count".
1871 * For TSO packets, rdma_count needs to be reset
1872 * to 0 after a segment cut.
1873 *
1874 * The rdma_count field of the send request is
1875 * the number of RDMAs of the packet starting at
1876 * that request. For TSO send requests with one ore more cuts
1877 * in the middle, this is the number of RDMAs starting
1878 * after the last cut in the request. All previous
1879 * segments before the last cut implicitly have 1 RDMA.
1880 *
1881 * Since the number of RDMAs is not known beforehand,
1882 * it must be filled-in retroactively - after each
1883 * segmentation cut or at the end of the entire packet.
1884 */
1885
1886 while (busdma_seg_cnt) {
1887 /* Break the busdma segment up into pieces*/
1888 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1889 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1890 len = seg->ds_len;
1891
1892 while (len) {
1893 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1894 seglen = len;
1895 cum_len_next = cum_len + seglen;
1896 (req-rdma_count)->rdma_count = rdma_count + 1;
1897 if (__predict_true(cum_len >= 0)) {
1898 /* payload */
1899 chop = (cum_len_next > mss);
1900 cum_len_next = cum_len_next % mss;
1901 next_is_first = (cum_len_next == 0);
1902 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1903 flags_next |= next_is_first *
1904 MXGEFW_FLAGS_FIRST;
1905 rdma_count |= -(chop | next_is_first);
1906 rdma_count += chop & !next_is_first;
1907 } else if (cum_len_next >= 0) {
1908 /* header ends */
1909 rdma_count = -1;
1910 cum_len_next = 0;
1911 seglen = -cum_len;
1912 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1913 flags_next = MXGEFW_FLAGS_TSO_PLD |
1914 MXGEFW_FLAGS_FIRST |
1915 (small * MXGEFW_FLAGS_SMALL);
1916 }
1917
1918 req->addr_high = high_swapped;
1919 req->addr_low = htobe32(low);
1920 req->pseudo_hdr_offset = pseudo_hdr_offset;
1921 req->pad = 0;
1922 req->rdma_count = 1;
1923 req->length = htobe16(seglen);
1924 req->cksum_offset = cksum_offset;
1925 req->flags = flags | ((cum_len & 1) *
1926 MXGEFW_FLAGS_ALIGN_ODD);
1927 low += seglen;
1928 len -= seglen;
1929 cum_len = cum_len_next;
1930 flags = flags_next;
1931 req++;
1932 cnt++;
1933 rdma_count++;
1934 if (cksum_offset != 0 && !pi->ip6) {
1935 if (__predict_false(cksum_offset > seglen))
1936 cksum_offset -= seglen;
1937 else
1938 cksum_offset = 0;
1939 }
1940 if (__predict_false(cnt > tx->max_desc))
1941 goto drop;
1942 }
1943 busdma_seg_cnt--;
1944 seg++;
1945 }
1946 (req-rdma_count)->rdma_count = rdma_count;
1947
1948 do {
1949 req--;
1950 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1951 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1952
1953 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1954 mxge_submit_req(tx, tx->req_list, cnt);
1955
1956 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1957 /* tell the NIC to start polling this slice */
1958 *tx->send_go = 1;
1959 tx->queue_active = 1;
1960 tx->activate++;
1961 wmb();
1962 }
1963
1964 return;
1965
1966 drop:
1967 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1968 m_freem(m);
1969 ss->oerrors++;
1970 if (!once) {
1971 printf("tx->max_desc exceeded via TSO!\n");
1972 printf("mss = %d, %ld, %d!\n", mss,
1973 (long)seg - (long)tx->seg_list, tx->max_desc);
1974 once = 1;
1975 }
1976 return;
1977
1978 }
1979
1980 #endif /* IFCAP_TSO4 */
1981
1982 #ifdef MXGE_NEW_VLAN_API
1983 /*
1984 * We reproduce the software vlan tag insertion from
1985 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1986 * vlan tag insertion. We need to advertise this in order to have the
1987 * vlan interface respect our csum offload flags.
1988 */
1989 static struct mbuf *
mxge_vlan_tag_insert(struct mbuf * m)1990 mxge_vlan_tag_insert(struct mbuf *m)
1991 {
1992 struct ether_vlan_header *evl;
1993
1994 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1995 if (__predict_false(m == NULL))
1996 return NULL;
1997 if (m->m_len < sizeof(*evl)) {
1998 m = m_pullup(m, sizeof(*evl));
1999 if (__predict_false(m == NULL))
2000 return NULL;
2001 }
2002 /*
2003 * Transform the Ethernet header into an Ethernet header
2004 * with 802.1Q encapsulation.
2005 */
2006 evl = mtod(m, struct ether_vlan_header *);
2007 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2008 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2009 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2010 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2011 m->m_flags &= ~M_VLANTAG;
2012 return m;
2013 }
2014 #endif /* MXGE_NEW_VLAN_API */
2015
2016 static void
mxge_encap(struct mxge_slice_state * ss,struct mbuf * m)2017 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2018 {
2019 struct mxge_pkt_info pi = {0,0,0,0};
2020 mxge_softc_t *sc;
2021 mcp_kreq_ether_send_t *req;
2022 bus_dma_segment_t *seg;
2023 struct mbuf *m_tmp;
2024 mxge_tx_ring_t *tx;
2025 int cnt, cum_len, err, i, idx, odd_flag;
2026 uint16_t pseudo_hdr_offset;
2027 uint8_t flags, cksum_offset;
2028
2029 sc = ss->sc;
2030 tx = &ss->tx;
2031
2032 #ifdef MXGE_NEW_VLAN_API
2033 if (m->m_flags & M_VLANTAG) {
2034 m = mxge_vlan_tag_insert(m);
2035 if (__predict_false(m == NULL))
2036 goto drop_without_m;
2037 }
2038 #endif
2039 if (m->m_pkthdr.csum_flags &
2040 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2041 if (mxge_parse_tx(ss, m, &pi))
2042 goto drop;
2043 }
2044
2045 /* (try to) map the frame for DMA */
2046 idx = tx->req & tx->mask;
2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2048 m, tx->seg_list, &cnt,
2049 BUS_DMA_NOWAIT);
2050 if (__predict_false(err == EFBIG)) {
2051 /* Too many segments in the chain. Try
2052 to defrag */
2053 m_tmp = m_defrag(m, M_NOWAIT);
2054 if (m_tmp == NULL) {
2055 goto drop;
2056 }
2057 ss->tx.defrag++;
2058 m = m_tmp;
2059 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2060 tx->info[idx].map,
2061 m, tx->seg_list, &cnt,
2062 BUS_DMA_NOWAIT);
2063 }
2064 if (__predict_false(err != 0)) {
2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2066 " packet len = %d\n", err, m->m_pkthdr.len);
2067 goto drop;
2068 }
2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2070 BUS_DMASYNC_PREWRITE);
2071 tx->info[idx].m = m;
2072
2073 #if IFCAP_TSO4
2074 /* TSO is different enough, we handle it in another routine */
2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2076 mxge_encap_tso(ss, m, cnt, &pi);
2077 return;
2078 }
2079 #endif
2080
2081 req = tx->req_list;
2082 cksum_offset = 0;
2083 pseudo_hdr_offset = 0;
2084 flags = MXGEFW_FLAGS_NO_TSO;
2085
2086 /* checksum offloading? */
2087 if (m->m_pkthdr.csum_flags &
2088 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2089 /* ensure ip header is in first mbuf, copy
2090 it to a scratch buffer if not */
2091 cksum_offset = pi.ip_off + pi.ip_hlen;
2092 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2093 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2094 req->cksum_offset = cksum_offset;
2095 flags |= MXGEFW_FLAGS_CKSUM;
2096 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2097 } else {
2098 odd_flag = 0;
2099 }
2100 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2101 flags |= MXGEFW_FLAGS_SMALL;
2102
2103 /* convert segments into a request list */
2104 cum_len = 0;
2105 seg = tx->seg_list;
2106 req->flags = MXGEFW_FLAGS_FIRST;
2107 for (i = 0; i < cnt; i++) {
2108 req->addr_low =
2109 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2110 req->addr_high =
2111 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2112 req->length = htobe16(seg->ds_len);
2113 req->cksum_offset = cksum_offset;
2114 if (cksum_offset > seg->ds_len)
2115 cksum_offset -= seg->ds_len;
2116 else
2117 cksum_offset = 0;
2118 req->pseudo_hdr_offset = pseudo_hdr_offset;
2119 req->pad = 0; /* complete solid 16-byte block */
2120 req->rdma_count = 1;
2121 req->flags |= flags | ((cum_len & 1) * odd_flag);
2122 cum_len += seg->ds_len;
2123 seg++;
2124 req++;
2125 req->flags = 0;
2126 }
2127 req--;
2128 /* pad runts to 60 bytes */
2129 if (cum_len < 60) {
2130 req++;
2131 req->addr_low =
2132 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2133 req->addr_high =
2134 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2135 req->length = htobe16(60 - cum_len);
2136 req->cksum_offset = 0;
2137 req->pseudo_hdr_offset = pseudo_hdr_offset;
2138 req->pad = 0; /* complete solid 16-byte block */
2139 req->rdma_count = 1;
2140 req->flags |= flags | ((cum_len & 1) * odd_flag);
2141 cnt++;
2142 }
2143
2144 tx->req_list[0].rdma_count = cnt;
2145 #if 0
2146 /* print what the firmware will see */
2147 for (i = 0; i < cnt; i++) {
2148 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2149 "cso:%d, flags:0x%x, rdma:%d\n",
2150 i, (int)ntohl(tx->req_list[i].addr_high),
2151 (int)ntohl(tx->req_list[i].addr_low),
2152 (int)ntohs(tx->req_list[i].length),
2153 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2154 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2155 tx->req_list[i].rdma_count);
2156 }
2157 printf("--------------\n");
2158 #endif
2159 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2160 mxge_submit_req(tx, tx->req_list, cnt);
2161
2162 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2163 /* tell the NIC to start polling this slice */
2164 *tx->send_go = 1;
2165 tx->queue_active = 1;
2166 tx->activate++;
2167 wmb();
2168 }
2169
2170 return;
2171
2172 drop:
2173 m_freem(m);
2174 drop_without_m:
2175 ss->oerrors++;
2176 return;
2177 }
2178
2179 static void
mxge_qflush(if_t ifp)2180 mxge_qflush(if_t ifp)
2181 {
2182 mxge_softc_t *sc = if_getsoftc(ifp);
2183 mxge_tx_ring_t *tx;
2184 struct mbuf *m;
2185 int slice;
2186
2187 for (slice = 0; slice < sc->num_slices; slice++) {
2188 tx = &sc->ss[slice].tx;
2189 mtx_lock(&tx->mtx);
2190 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2191 m_freem(m);
2192 mtx_unlock(&tx->mtx);
2193 }
2194 if_qflush(ifp);
2195 }
2196
2197 static inline void
mxge_start_locked(struct mxge_slice_state * ss)2198 mxge_start_locked(struct mxge_slice_state *ss)
2199 {
2200 mxge_softc_t *sc;
2201 struct mbuf *m;
2202 if_t ifp;
2203 mxge_tx_ring_t *tx;
2204
2205 sc = ss->sc;
2206 ifp = sc->ifp;
2207 tx = &ss->tx;
2208
2209 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2210 m = drbr_dequeue(ifp, tx->br);
2211 if (m == NULL) {
2212 return;
2213 }
2214 /* let BPF see it */
2215 BPF_MTAP(ifp, m);
2216
2217 /* give it to the nic */
2218 mxge_encap(ss, m);
2219 }
2220 /* ran out of transmit slots */
2221 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2222 && (!drbr_empty(ifp, tx->br))) {
2223 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2224 tx->stall++;
2225 }
2226 }
2227
2228 static int
mxge_transmit_locked(struct mxge_slice_state * ss,struct mbuf * m)2229 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2230 {
2231 mxge_softc_t *sc;
2232 if_t ifp;
2233 mxge_tx_ring_t *tx;
2234 int err;
2235
2236 sc = ss->sc;
2237 ifp = sc->ifp;
2238 tx = &ss->tx;
2239
2240 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2241 IFF_DRV_RUNNING) {
2242 err = drbr_enqueue(ifp, tx->br, m);
2243 return (err);
2244 }
2245
2246 if (!drbr_needs_enqueue(ifp, tx->br) &&
2247 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2248 /* let BPF see it */
2249 BPF_MTAP(ifp, m);
2250 /* give it to the nic */
2251 mxge_encap(ss, m);
2252 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2253 return (err);
2254 }
2255 if (!drbr_empty(ifp, tx->br))
2256 mxge_start_locked(ss);
2257 return (0);
2258 }
2259
2260 static int
mxge_transmit(if_t ifp,struct mbuf * m)2261 mxge_transmit(if_t ifp, struct mbuf *m)
2262 {
2263 mxge_softc_t *sc = if_getsoftc(ifp);
2264 struct mxge_slice_state *ss;
2265 mxge_tx_ring_t *tx;
2266 int err = 0;
2267 int slice;
2268
2269 slice = m->m_pkthdr.flowid;
2270 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2271
2272 ss = &sc->ss[slice];
2273 tx = &ss->tx;
2274
2275 if (mtx_trylock(&tx->mtx)) {
2276 err = mxge_transmit_locked(ss, m);
2277 mtx_unlock(&tx->mtx);
2278 } else {
2279 err = drbr_enqueue(ifp, tx->br, m);
2280 }
2281
2282 return (err);
2283 }
2284
2285 static void
mxge_start(if_t ifp)2286 mxge_start(if_t ifp)
2287 {
2288 mxge_softc_t *sc = if_getsoftc(ifp);
2289 struct mxge_slice_state *ss;
2290
2291 /* only use the first slice for now */
2292 ss = &sc->ss[0];
2293 mtx_lock(&ss->tx.mtx);
2294 mxge_start_locked(ss);
2295 mtx_unlock(&ss->tx.mtx);
2296 }
2297
2298 /*
2299 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2300 * at most 32 bytes at a time, so as to avoid involving the software
2301 * pio handler in the nic. We re-write the first segment's low
2302 * DMA address to mark it valid only after we write the entire chunk
2303 * in a burst
2304 */
2305 static inline void
mxge_submit_8rx(volatile mcp_kreq_ether_recv_t * dst,mcp_kreq_ether_recv_t * src)2306 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2307 mcp_kreq_ether_recv_t *src)
2308 {
2309 uint32_t low;
2310
2311 low = src->addr_low;
2312 src->addr_low = 0xffffffff;
2313 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2314 wmb();
2315 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2316 wmb();
2317 src->addr_low = low;
2318 dst->addr_low = low;
2319 wmb();
2320 }
2321
2322 static int
mxge_get_buf_small(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2323 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2324 {
2325 bus_dma_segment_t seg;
2326 struct mbuf *m;
2327 mxge_rx_ring_t *rx = &ss->rx_small;
2328 int cnt, err;
2329
2330 m = m_gethdr(M_NOWAIT, MT_DATA);
2331 if (m == NULL) {
2332 rx->alloc_fail++;
2333 err = ENOBUFS;
2334 goto done;
2335 }
2336 m->m_len = MHLEN;
2337 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2338 &seg, &cnt, BUS_DMA_NOWAIT);
2339 if (err != 0) {
2340 m_free(m);
2341 goto done;
2342 }
2343 rx->info[idx].m = m;
2344 rx->shadow[idx].addr_low =
2345 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2346 rx->shadow[idx].addr_high =
2347 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2348
2349 done:
2350 if ((idx & 7) == 7)
2351 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2352 return err;
2353 }
2354
2355 static int
mxge_get_buf_big(struct mxge_slice_state * ss,bus_dmamap_t map,int idx)2356 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2357 {
2358 bus_dma_segment_t seg[3];
2359 struct mbuf *m;
2360 mxge_rx_ring_t *rx = &ss->rx_big;
2361 int cnt, err, i;
2362
2363 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2364 if (m == NULL) {
2365 rx->alloc_fail++;
2366 err = ENOBUFS;
2367 goto done;
2368 }
2369 m->m_len = rx->mlen;
2370 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2371 seg, &cnt, BUS_DMA_NOWAIT);
2372 if (err != 0) {
2373 m_free(m);
2374 goto done;
2375 }
2376 rx->info[idx].m = m;
2377 rx->shadow[idx].addr_low =
2378 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2379 rx->shadow[idx].addr_high =
2380 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2381
2382 done:
2383 for (i = 0; i < rx->nbufs; i++) {
2384 if ((idx & 7) == 7) {
2385 mxge_submit_8rx(&rx->lanai[idx - 7],
2386 &rx->shadow[idx - 7]);
2387 }
2388 idx++;
2389 }
2390 return err;
2391 }
2392
2393 #ifdef INET6
2394
2395 static uint16_t
mxge_csum_generic(uint16_t * raw,int len)2396 mxge_csum_generic(uint16_t *raw, int len)
2397 {
2398 uint32_t csum;
2399
2400 csum = 0;
2401 while (len > 0) {
2402 csum += *raw;
2403 raw++;
2404 len -= 2;
2405 }
2406 csum = (csum >> 16) + (csum & 0xffff);
2407 csum = (csum >> 16) + (csum & 0xffff);
2408 return (uint16_t)csum;
2409 }
2410
2411 static inline uint16_t
mxge_rx_csum6(void * p,struct mbuf * m,uint32_t csum)2412 mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2413 {
2414 uint32_t partial;
2415 int nxt, cksum_offset;
2416 struct ip6_hdr *ip6 = p;
2417 uint16_t c;
2418
2419 nxt = ip6->ip6_nxt;
2420 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2421 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2422 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2423 IPPROTO_IPV6, &nxt);
2424 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2425 return (1);
2426 }
2427
2428 /*
2429 * IPv6 headers do not contain a checksum, and hence
2430 * do not checksum to zero, so they don't "fall out"
2431 * of the partial checksum calculation like IPv4
2432 * headers do. We need to fix the partial checksum by
2433 * subtracting the checksum of the IPv6 header.
2434 */
2435
2436 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2437 ETHER_HDR_LEN);
2438 csum += ~partial;
2439 csum += (csum < ~partial);
2440 csum = (csum >> 16) + (csum & 0xFFFF);
2441 csum = (csum >> 16) + (csum & 0xFFFF);
2442 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2443 csum);
2444 c ^= 0xffff;
2445 return (c);
2446 }
2447 #endif /* INET6 */
2448 /*
2449 * Myri10GE hardware checksums are not valid if the sender
2450 * padded the frame with non-zero padding. This is because
2451 * the firmware just does a simple 16-bit 1s complement
2452 * checksum across the entire frame, excluding the first 14
2453 * bytes. It is best to simply to check the checksum and
2454 * tell the stack about it only if the checksum is good
2455 */
2456
2457 static inline uint16_t
mxge_rx_csum(struct mbuf * m,int csum)2458 mxge_rx_csum(struct mbuf *m, int csum)
2459 {
2460 struct ether_header *eh;
2461 #ifdef INET
2462 struct ip *ip;
2463 #endif
2464 #if defined(INET) || defined(INET6)
2465 int cap = if_getcapenable(m->m_pkthdr.rcvif);
2466 #endif
2467 uint16_t c, etype;
2468
2469 eh = mtod(m, struct ether_header *);
2470 etype = ntohs(eh->ether_type);
2471 switch (etype) {
2472 #ifdef INET
2473 case ETHERTYPE_IP:
2474 if ((cap & IFCAP_RXCSUM) == 0)
2475 return (1);
2476 ip = (struct ip *)(eh + 1);
2477 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2478 return (1);
2479 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2480 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2481 (ip->ip_hl << 2) + ip->ip_p));
2482 c ^= 0xffff;
2483 break;
2484 #endif
2485 #ifdef INET6
2486 case ETHERTYPE_IPV6:
2487 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2488 return (1);
2489 c = mxge_rx_csum6((eh + 1), m, csum);
2490 break;
2491 #endif
2492 default:
2493 c = 1;
2494 }
2495 return (c);
2496 }
2497
2498 static void
mxge_vlan_tag_remove(struct mbuf * m,uint32_t * csum)2499 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2500 {
2501 struct ether_vlan_header *evl;
2502 uint32_t partial;
2503
2504 evl = mtod(m, struct ether_vlan_header *);
2505
2506 /*
2507 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2508 * after what the firmware thought was the end of the ethernet
2509 * header.
2510 */
2511
2512 /* put checksum into host byte order */
2513 *csum = ntohs(*csum);
2514 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2515 (*csum) += ~partial;
2516 (*csum) += ((*csum) < ~partial);
2517 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2518 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2519
2520 /* restore checksum to network byte order;
2521 later consumers expect this */
2522 *csum = htons(*csum);
2523
2524 /* save the tag */
2525 #ifdef MXGE_NEW_VLAN_API
2526 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2527 #else
2528 {
2529 struct m_tag *mtag;
2530 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2531 M_NOWAIT);
2532 if (mtag == NULL)
2533 return;
2534 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2535 m_tag_prepend(m, mtag);
2536 }
2537
2538 #endif
2539 m->m_flags |= M_VLANTAG;
2540
2541 /*
2542 * Remove the 802.1q header by copying the Ethernet
2543 * addresses over it and adjusting the beginning of
2544 * the data in the mbuf. The encapsulated Ethernet
2545 * type field is already in place.
2546 */
2547 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2548 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2549 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2550 }
2551
2552 static inline void
mxge_rx_done_big(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2553 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2554 uint32_t csum, int lro)
2555 {
2556 mxge_softc_t *sc;
2557 if_t ifp;
2558 struct mbuf *m;
2559 struct ether_header *eh;
2560 mxge_rx_ring_t *rx;
2561 bus_dmamap_t old_map;
2562 int idx;
2563
2564 sc = ss->sc;
2565 ifp = sc->ifp;
2566 rx = &ss->rx_big;
2567 idx = rx->cnt & rx->mask;
2568 rx->cnt += rx->nbufs;
2569 /* save a pointer to the received mbuf */
2570 m = rx->info[idx].m;
2571 /* try to replace the received mbuf */
2572 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2573 /* drop the frame -- the old mbuf is re-cycled */
2574 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2575 return;
2576 }
2577
2578 /* unmap the received buffer */
2579 old_map = rx->info[idx].map;
2580 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2581 bus_dmamap_unload(rx->dmat, old_map);
2582
2583 /* swap the bus_dmamap_t's */
2584 rx->info[idx].map = rx->extra_map;
2585 rx->extra_map = old_map;
2586
2587 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2588 * aligned */
2589 m->m_data += MXGEFW_PAD;
2590
2591 m->m_pkthdr.rcvif = ifp;
2592 m->m_len = m->m_pkthdr.len = len;
2593 ss->ipackets++;
2594 eh = mtod(m, struct ether_header *);
2595 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2596 mxge_vlan_tag_remove(m, &csum);
2597 }
2598 /* flowid only valid if RSS hashing is enabled */
2599 if (sc->num_slices > 1) {
2600 m->m_pkthdr.flowid = (ss - sc->ss);
2601 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2602 }
2603 /* if the checksum is valid, mark it in the mbuf header */
2604 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2605 (0 == mxge_rx_csum(m, csum))) {
2606 /* Tell the stack that the checksum is good */
2607 m->m_pkthdr.csum_data = 0xffff;
2608 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2609 CSUM_DATA_VALID;
2610
2611 #if defined(INET) || defined (INET6)
2612 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2613 return;
2614 #endif
2615 }
2616 /* pass the frame up the stack */
2617 if_input(ifp, m);
2618 }
2619
2620 static inline void
mxge_rx_done_small(struct mxge_slice_state * ss,uint32_t len,uint32_t csum,int lro)2621 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2622 uint32_t csum, int lro)
2623 {
2624 mxge_softc_t *sc;
2625 if_t ifp;
2626 struct ether_header *eh;
2627 struct mbuf *m;
2628 mxge_rx_ring_t *rx;
2629 bus_dmamap_t old_map;
2630 int idx;
2631
2632 sc = ss->sc;
2633 ifp = sc->ifp;
2634 rx = &ss->rx_small;
2635 idx = rx->cnt & rx->mask;
2636 rx->cnt++;
2637 /* save a pointer to the received mbuf */
2638 m = rx->info[idx].m;
2639 /* try to replace the received mbuf */
2640 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2641 /* drop the frame -- the old mbuf is re-cycled */
2642 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2643 return;
2644 }
2645
2646 /* unmap the received buffer */
2647 old_map = rx->info[idx].map;
2648 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2649 bus_dmamap_unload(rx->dmat, old_map);
2650
2651 /* swap the bus_dmamap_t's */
2652 rx->info[idx].map = rx->extra_map;
2653 rx->extra_map = old_map;
2654
2655 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2656 * aligned */
2657 m->m_data += MXGEFW_PAD;
2658
2659 m->m_pkthdr.rcvif = ifp;
2660 m->m_len = m->m_pkthdr.len = len;
2661 ss->ipackets++;
2662 eh = mtod(m, struct ether_header *);
2663 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2664 mxge_vlan_tag_remove(m, &csum);
2665 }
2666 /* flowid only valid if RSS hashing is enabled */
2667 if (sc->num_slices > 1) {
2668 m->m_pkthdr.flowid = (ss - sc->ss);
2669 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2670 }
2671 /* if the checksum is valid, mark it in the mbuf header */
2672 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2673 (0 == mxge_rx_csum(m, csum))) {
2674 /* Tell the stack that the checksum is good */
2675 m->m_pkthdr.csum_data = 0xffff;
2676 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2677 CSUM_DATA_VALID;
2678
2679 #if defined(INET) || defined (INET6)
2680 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2681 return;
2682 #endif
2683 }
2684 /* pass the frame up the stack */
2685 if_input(ifp, m);
2686 }
2687
2688 static inline void
mxge_clean_rx_done(struct mxge_slice_state * ss)2689 mxge_clean_rx_done(struct mxge_slice_state *ss)
2690 {
2691 mxge_rx_done_t *rx_done = &ss->rx_done;
2692 int limit = 0;
2693 uint16_t length;
2694 uint16_t checksum;
2695 int lro;
2696
2697 lro = if_getcapenable(ss->sc->ifp) & IFCAP_LRO;
2698 while (rx_done->entry[rx_done->idx].length != 0) {
2699 length = ntohs(rx_done->entry[rx_done->idx].length);
2700 rx_done->entry[rx_done->idx].length = 0;
2701 checksum = rx_done->entry[rx_done->idx].checksum;
2702 if (length <= (MHLEN - MXGEFW_PAD))
2703 mxge_rx_done_small(ss, length, checksum, lro);
2704 else
2705 mxge_rx_done_big(ss, length, checksum, lro);
2706 rx_done->cnt++;
2707 rx_done->idx = rx_done->cnt & rx_done->mask;
2708
2709 /* limit potential for livelock */
2710 if (__predict_false(++limit > rx_done->mask / 2))
2711 break;
2712 }
2713 #if defined(INET) || defined (INET6)
2714 tcp_lro_flush_all(&ss->lc);
2715 #endif
2716 }
2717
2718 static inline void
mxge_tx_done(struct mxge_slice_state * ss,uint32_t mcp_idx)2719 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2720 {
2721 if_t ifp __unused;
2722 mxge_tx_ring_t *tx;
2723 struct mbuf *m;
2724 bus_dmamap_t map;
2725 int idx;
2726 int *flags;
2727
2728 tx = &ss->tx;
2729 ifp = ss->sc->ifp;
2730 while (tx->pkt_done != mcp_idx) {
2731 idx = tx->done & tx->mask;
2732 tx->done++;
2733 m = tx->info[idx].m;
2734 /* mbuf and DMA map only attached to the first
2735 segment per-mbuf */
2736 if (m != NULL) {
2737 ss->obytes += m->m_pkthdr.len;
2738 if (m->m_flags & M_MCAST)
2739 ss->omcasts++;
2740 ss->opackets++;
2741 tx->info[idx].m = NULL;
2742 map = tx->info[idx].map;
2743 bus_dmamap_unload(tx->dmat, map);
2744 m_freem(m);
2745 }
2746 if (tx->info[idx].flag) {
2747 tx->info[idx].flag = 0;
2748 tx->pkt_done++;
2749 }
2750 }
2751
2752 /* If we have space, clear IFF_OACTIVE to tell the stack that
2753 its OK to send packets */
2754 flags = &ss->if_drv_flags;
2755 mtx_lock(&ss->tx.mtx);
2756 if ((*flags) & IFF_DRV_OACTIVE &&
2757 tx->req - tx->done < (tx->mask + 1)/4) {
2758 *(flags) &= ~IFF_DRV_OACTIVE;
2759 ss->tx.wake++;
2760 mxge_start_locked(ss);
2761 }
2762 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2763 /* let the NIC stop polling this queue, since there
2764 * are no more transmits pending */
2765 if (tx->req == tx->done) {
2766 *tx->send_stop = 1;
2767 tx->queue_active = 0;
2768 tx->deactivate++;
2769 wmb();
2770 }
2771 }
2772 mtx_unlock(&ss->tx.mtx);
2773 }
2774
2775 static struct mxge_media_type mxge_xfp_media_types[] =
2776 {
2777 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2778 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2779 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2780 {0, (1 << 5), "10GBASE-ER"},
2781 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2782 {0, (1 << 3), "10GBASE-SW"},
2783 {0, (1 << 2), "10GBASE-LW"},
2784 {0, (1 << 1), "10GBASE-EW"},
2785 {0, (1 << 0), "Reserved"}
2786 };
2787 static struct mxge_media_type mxge_sfp_media_types[] =
2788 {
2789 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2790 {0, (1 << 7), "Reserved"},
2791 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2792 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2793 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2794 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2795 };
2796
2797 static void
mxge_media_set(mxge_softc_t * sc,int media_type)2798 mxge_media_set(mxge_softc_t *sc, int media_type)
2799 {
2800
2801 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2802 0, NULL);
2803 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2804 sc->current_media = media_type;
2805 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2806 }
2807
2808 static void
mxge_media_init(mxge_softc_t * sc)2809 mxge_media_init(mxge_softc_t *sc)
2810 {
2811 char *ptr;
2812 int i;
2813
2814 ifmedia_removeall(&sc->media);
2815 mxge_media_set(sc, IFM_AUTO);
2816
2817 /*
2818 * parse the product code to deterimine the interface type
2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2820 * after the 3rd dash in the driver's cached copy of the
2821 * EEPROM's product code string.
2822 */
2823 ptr = sc->product_code_string;
2824 if (ptr == NULL) {
2825 device_printf(sc->dev, "Missing product code\n");
2826 return;
2827 }
2828
2829 for (i = 0; i < 3; i++, ptr++) {
2830 ptr = strchr(ptr, '-');
2831 if (ptr == NULL) {
2832 device_printf(sc->dev,
2833 "only %d dashes in PC?!?\n", i);
2834 return;
2835 }
2836 }
2837 if (*ptr == 'C' || *(ptr +1) == 'C') {
2838 /* -C is CX4 */
2839 sc->connector = MXGE_CX4;
2840 mxge_media_set(sc, IFM_10G_CX4);
2841 } else if (*ptr == 'Q') {
2842 /* -Q is Quad Ribbon Fiber */
2843 sc->connector = MXGE_QRF;
2844 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2845 /* FreeBSD has no media type for Quad ribbon fiber */
2846 } else if (*ptr == 'R') {
2847 /* -R is XFP */
2848 sc->connector = MXGE_XFP;
2849 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2850 /* -S or -2S is SFP+ */
2851 sc->connector = MXGE_SFP;
2852 } else {
2853 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2854 }
2855 }
2856
2857 /*
2858 * Determine the media type for a NIC. Some XFPs will identify
2859 * themselves only when their link is up, so this is initiated via a
2860 * link up interrupt. However, this can potentially take up to
2861 * several milliseconds, so it is run via the watchdog routine, rather
2862 * than in the interrupt handler itself.
2863 */
2864 static void
mxge_media_probe(mxge_softc_t * sc)2865 mxge_media_probe(mxge_softc_t *sc)
2866 {
2867 mxge_cmd_t cmd;
2868 char *cage_type;
2869
2870 struct mxge_media_type *mxge_media_types = NULL;
2871 int i, err, ms, mxge_media_type_entries;
2872 uint32_t byte;
2873
2874 sc->need_media_probe = 0;
2875
2876 if (sc->connector == MXGE_XFP) {
2877 /* -R is XFP */
2878 mxge_media_types = mxge_xfp_media_types;
2879 mxge_media_type_entries =
2880 nitems(mxge_xfp_media_types);
2881 byte = MXGE_XFP_COMPLIANCE_BYTE;
2882 cage_type = "XFP";
2883 } else if (sc->connector == MXGE_SFP) {
2884 /* -S or -2S is SFP+ */
2885 mxge_media_types = mxge_sfp_media_types;
2886 mxge_media_type_entries =
2887 nitems(mxge_sfp_media_types);
2888 cage_type = "SFP+";
2889 byte = 3;
2890 } else {
2891 /* nothing to do; media type cannot change */
2892 return;
2893 }
2894
2895 /*
2896 * At this point we know the NIC has an XFP cage, so now we
2897 * try to determine what is in the cage by using the
2898 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2899 * register. We read just one byte, which may take over
2900 * a millisecond
2901 */
2902
2903 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2904 cmd.data1 = byte;
2905 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2906 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2907 device_printf(sc->dev, "failed to read XFP\n");
2908 }
2909 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2910 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2911 }
2912 if (err != MXGEFW_CMD_OK) {
2913 return;
2914 }
2915
2916 /* now we wait for the data to be cached */
2917 cmd.data0 = byte;
2918 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2919 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2920 DELAY(1000);
2921 cmd.data0 = byte;
2922 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2923 }
2924 if (err != MXGEFW_CMD_OK) {
2925 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2926 cage_type, err, ms);
2927 return;
2928 }
2929
2930 if (cmd.data0 == mxge_media_types[0].bitmask) {
2931 if (mxge_verbose)
2932 device_printf(sc->dev, "%s:%s\n", cage_type,
2933 mxge_media_types[0].name);
2934 if (sc->current_media != mxge_media_types[0].flag) {
2935 mxge_media_init(sc);
2936 mxge_media_set(sc, mxge_media_types[0].flag);
2937 }
2938 return;
2939 }
2940 for (i = 1; i < mxge_media_type_entries; i++) {
2941 if (cmd.data0 & mxge_media_types[i].bitmask) {
2942 if (mxge_verbose)
2943 device_printf(sc->dev, "%s:%s\n",
2944 cage_type,
2945 mxge_media_types[i].name);
2946
2947 if (sc->current_media != mxge_media_types[i].flag) {
2948 mxge_media_init(sc);
2949 mxge_media_set(sc, mxge_media_types[i].flag);
2950 }
2951 return;
2952 }
2953 }
2954 if (mxge_verbose)
2955 device_printf(sc->dev, "%s media 0x%x unknown\n",
2956 cage_type, cmd.data0);
2957
2958 return;
2959 }
2960
2961 static void
mxge_intr(void * arg)2962 mxge_intr(void *arg)
2963 {
2964 struct mxge_slice_state *ss = arg;
2965 mxge_softc_t *sc = ss->sc;
2966 mcp_irq_data_t *stats = ss->fw_stats;
2967 mxge_tx_ring_t *tx = &ss->tx;
2968 mxge_rx_done_t *rx_done = &ss->rx_done;
2969 uint32_t send_done_count;
2970 uint8_t valid;
2971
2972 /* make sure the DMA has finished */
2973 if (!stats->valid) {
2974 return;
2975 }
2976 valid = stats->valid;
2977
2978 if (sc->legacy_irq) {
2979 /* lower legacy IRQ */
2980 *sc->irq_deassert = 0;
2981 if (!mxge_deassert_wait)
2982 /* don't wait for conf. that irq is low */
2983 stats->valid = 0;
2984 } else {
2985 stats->valid = 0;
2986 }
2987
2988 /* loop while waiting for legacy irq deassertion */
2989 do {
2990 /* check for transmit completes and receives */
2991 send_done_count = be32toh(stats->send_done_count);
2992 while ((send_done_count != tx->pkt_done) ||
2993 (rx_done->entry[rx_done->idx].length != 0)) {
2994 if (send_done_count != tx->pkt_done)
2995 mxge_tx_done(ss, (int)send_done_count);
2996 mxge_clean_rx_done(ss);
2997 send_done_count = be32toh(stats->send_done_count);
2998 }
2999 if (sc->legacy_irq && mxge_deassert_wait)
3000 wmb();
3001 } while (*((volatile uint8_t *) &stats->valid));
3002
3003 /* fw link & error stats meaningful only on the first slice */
3004 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3005 if (sc->link_state != stats->link_up) {
3006 sc->link_state = stats->link_up;
3007 if (sc->link_state) {
3008 if_link_state_change(sc->ifp, LINK_STATE_UP);
3009 if (mxge_verbose)
3010 device_printf(sc->dev, "link up\n");
3011 } else {
3012 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3013 if (mxge_verbose)
3014 device_printf(sc->dev, "link down\n");
3015 }
3016 sc->need_media_probe = 1;
3017 }
3018 if (sc->rdma_tags_available !=
3019 be32toh(stats->rdma_tags_available)) {
3020 sc->rdma_tags_available =
3021 be32toh(stats->rdma_tags_available);
3022 device_printf(sc->dev, "RDMA timed out! %d tags "
3023 "left\n", sc->rdma_tags_available);
3024 }
3025
3026 if (stats->link_down) {
3027 sc->down_cnt += stats->link_down;
3028 sc->link_state = 0;
3029 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3030 }
3031 }
3032
3033 /* check to see if we have rx token to pass back */
3034 if (valid & 0x1)
3035 *ss->irq_claim = be32toh(3);
3036 *(ss->irq_claim + 1) = be32toh(3);
3037 }
3038
3039 static void
mxge_init(void * arg)3040 mxge_init(void *arg)
3041 {
3042 mxge_softc_t *sc = arg;
3043 if_t ifp = sc->ifp;
3044
3045 mtx_lock(&sc->driver_mtx);
3046 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3047 (void) mxge_open(sc);
3048 mtx_unlock(&sc->driver_mtx);
3049 }
3050
3051 static void
mxge_free_slice_mbufs(struct mxge_slice_state * ss)3052 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3053 {
3054 int i;
3055
3056 #if defined(INET) || defined(INET6)
3057 tcp_lro_free(&ss->lc);
3058 #endif
3059 for (i = 0; i <= ss->rx_big.mask; i++) {
3060 if (ss->rx_big.info[i].m == NULL)
3061 continue;
3062 bus_dmamap_unload(ss->rx_big.dmat,
3063 ss->rx_big.info[i].map);
3064 m_freem(ss->rx_big.info[i].m);
3065 ss->rx_big.info[i].m = NULL;
3066 }
3067
3068 for (i = 0; i <= ss->rx_small.mask; i++) {
3069 if (ss->rx_small.info[i].m == NULL)
3070 continue;
3071 bus_dmamap_unload(ss->rx_small.dmat,
3072 ss->rx_small.info[i].map);
3073 m_freem(ss->rx_small.info[i].m);
3074 ss->rx_small.info[i].m = NULL;
3075 }
3076
3077 /* transmit ring used only on the first slice */
3078 if (ss->tx.info == NULL)
3079 return;
3080
3081 for (i = 0; i <= ss->tx.mask; i++) {
3082 ss->tx.info[i].flag = 0;
3083 if (ss->tx.info[i].m == NULL)
3084 continue;
3085 bus_dmamap_unload(ss->tx.dmat,
3086 ss->tx.info[i].map);
3087 m_freem(ss->tx.info[i].m);
3088 ss->tx.info[i].m = NULL;
3089 }
3090 }
3091
3092 static void
mxge_free_mbufs(mxge_softc_t * sc)3093 mxge_free_mbufs(mxge_softc_t *sc)
3094 {
3095 int slice;
3096
3097 for (slice = 0; slice < sc->num_slices; slice++)
3098 mxge_free_slice_mbufs(&sc->ss[slice]);
3099 }
3100
3101 static void
mxge_free_slice_rings(struct mxge_slice_state * ss)3102 mxge_free_slice_rings(struct mxge_slice_state *ss)
3103 {
3104 int i;
3105
3106 if (ss->rx_done.entry != NULL)
3107 mxge_dma_free(&ss->rx_done.dma);
3108 ss->rx_done.entry = NULL;
3109
3110 if (ss->tx.req_bytes != NULL)
3111 free(ss->tx.req_bytes, M_DEVBUF);
3112 ss->tx.req_bytes = NULL;
3113
3114 if (ss->tx.seg_list != NULL)
3115 free(ss->tx.seg_list, M_DEVBUF);
3116 ss->tx.seg_list = NULL;
3117
3118 if (ss->rx_small.shadow != NULL)
3119 free(ss->rx_small.shadow, M_DEVBUF);
3120 ss->rx_small.shadow = NULL;
3121
3122 if (ss->rx_big.shadow != NULL)
3123 free(ss->rx_big.shadow, M_DEVBUF);
3124 ss->rx_big.shadow = NULL;
3125
3126 if (ss->tx.info != NULL) {
3127 if (ss->tx.dmat != NULL) {
3128 for (i = 0; i <= ss->tx.mask; i++) {
3129 bus_dmamap_destroy(ss->tx.dmat,
3130 ss->tx.info[i].map);
3131 }
3132 bus_dma_tag_destroy(ss->tx.dmat);
3133 }
3134 free(ss->tx.info, M_DEVBUF);
3135 }
3136 ss->tx.info = NULL;
3137
3138 if (ss->rx_small.info != NULL) {
3139 if (ss->rx_small.dmat != NULL) {
3140 for (i = 0; i <= ss->rx_small.mask; i++) {
3141 bus_dmamap_destroy(ss->rx_small.dmat,
3142 ss->rx_small.info[i].map);
3143 }
3144 bus_dmamap_destroy(ss->rx_small.dmat,
3145 ss->rx_small.extra_map);
3146 bus_dma_tag_destroy(ss->rx_small.dmat);
3147 }
3148 free(ss->rx_small.info, M_DEVBUF);
3149 }
3150 ss->rx_small.info = NULL;
3151
3152 if (ss->rx_big.info != NULL) {
3153 if (ss->rx_big.dmat != NULL) {
3154 for (i = 0; i <= ss->rx_big.mask; i++) {
3155 bus_dmamap_destroy(ss->rx_big.dmat,
3156 ss->rx_big.info[i].map);
3157 }
3158 bus_dmamap_destroy(ss->rx_big.dmat,
3159 ss->rx_big.extra_map);
3160 bus_dma_tag_destroy(ss->rx_big.dmat);
3161 }
3162 free(ss->rx_big.info, M_DEVBUF);
3163 }
3164 ss->rx_big.info = NULL;
3165 }
3166
3167 static void
mxge_free_rings(mxge_softc_t * sc)3168 mxge_free_rings(mxge_softc_t *sc)
3169 {
3170 int slice;
3171
3172 for (slice = 0; slice < sc->num_slices; slice++)
3173 mxge_free_slice_rings(&sc->ss[slice]);
3174 }
3175
3176 static int
mxge_alloc_slice_rings(struct mxge_slice_state * ss,int rx_ring_entries,int tx_ring_entries)3177 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3178 int tx_ring_entries)
3179 {
3180 mxge_softc_t *sc = ss->sc;
3181 size_t bytes;
3182 int err, i;
3183
3184 /* allocate per-slice receive resources */
3185
3186 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3187 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3188
3189 /* allocate the rx shadow rings */
3190 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3191 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3192
3193 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3194 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3195
3196 /* allocate the rx host info rings */
3197 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3198 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3199
3200 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3201 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3202
3203 /* allocate the rx busdma resources */
3204 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3205 1, /* alignment */
3206 4096, /* boundary */
3207 BUS_SPACE_MAXADDR, /* low */
3208 BUS_SPACE_MAXADDR, /* high */
3209 NULL, NULL, /* filter */
3210 MHLEN, /* maxsize */
3211 1, /* num segs */
3212 MHLEN, /* maxsegsize */
3213 BUS_DMA_ALLOCNOW, /* flags */
3214 NULL, NULL, /* lock */
3215 &ss->rx_small.dmat); /* tag */
3216 if (err != 0) {
3217 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3218 err);
3219 return err;
3220 }
3221
3222 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3223 1, /* alignment */
3224 0, /* boundary */
3225 BUS_SPACE_MAXADDR, /* low */
3226 BUS_SPACE_MAXADDR, /* high */
3227 NULL, NULL, /* filter */
3228 3*4096, /* maxsize */
3229 1, /* num segs */
3230 MJUM9BYTES, /* maxsegsize*/
3231 BUS_DMA_ALLOCNOW, /* flags */
3232 NULL, NULL, /* lock */
3233 &ss->rx_big.dmat); /* tag */
3234 if (err != 0) {
3235 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3236 err);
3237 return err;
3238 }
3239 for (i = 0; i <= ss->rx_small.mask; i++) {
3240 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3241 &ss->rx_small.info[i].map);
3242 if (err != 0) {
3243 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3244 err);
3245 return err;
3246 }
3247 }
3248 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3249 &ss->rx_small.extra_map);
3250 if (err != 0) {
3251 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3252 err);
3253 return err;
3254 }
3255
3256 for (i = 0; i <= ss->rx_big.mask; i++) {
3257 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3258 &ss->rx_big.info[i].map);
3259 if (err != 0) {
3260 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3261 err);
3262 return err;
3263 }
3264 }
3265 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3266 &ss->rx_big.extra_map);
3267 if (err != 0) {
3268 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3269 err);
3270 return err;
3271 }
3272
3273 /* now allocate TX resources */
3274
3275 ss->tx.mask = tx_ring_entries - 1;
3276 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3277
3278 /* allocate the tx request copy block */
3279 bytes = 8 +
3280 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3281 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3282 /* ensure req_list entries are aligned to 8 bytes */
3283 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3284 ((uintptr_t)(ss->tx.req_bytes + 7) & ~7UL);
3285
3286 /* allocate the tx busdma segment list */
3287 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3288 ss->tx.seg_list = (bus_dma_segment_t *)
3289 malloc(bytes, M_DEVBUF, M_WAITOK);
3290
3291 /* allocate the tx host info ring */
3292 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3293 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3294
3295 /* allocate the tx busdma resources */
3296 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3297 1, /* alignment */
3298 sc->tx_boundary, /* boundary */
3299 BUS_SPACE_MAXADDR, /* low */
3300 BUS_SPACE_MAXADDR, /* high */
3301 NULL, NULL, /* filter */
3302 65536 + 256, /* maxsize */
3303 ss->tx.max_desc - 2, /* num segs */
3304 sc->tx_boundary, /* maxsegsz */
3305 BUS_DMA_ALLOCNOW, /* flags */
3306 NULL, NULL, /* lock */
3307 &ss->tx.dmat); /* tag */
3308
3309 if (err != 0) {
3310 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3311 err);
3312 return err;
3313 }
3314
3315 /* now use these tags to setup dmamaps for each slot
3316 in the ring */
3317 for (i = 0; i <= ss->tx.mask; i++) {
3318 err = bus_dmamap_create(ss->tx.dmat, 0,
3319 &ss->tx.info[i].map);
3320 if (err != 0) {
3321 device_printf(sc->dev, "Err %d tx dmamap\n",
3322 err);
3323 return err;
3324 }
3325 }
3326 return 0;
3327
3328 }
3329
3330 static int
mxge_alloc_rings(mxge_softc_t * sc)3331 mxge_alloc_rings(mxge_softc_t *sc)
3332 {
3333 mxge_cmd_t cmd;
3334 int tx_ring_size;
3335 int tx_ring_entries, rx_ring_entries;
3336 int err, slice;
3337
3338 /* get ring sizes */
3339 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3340 tx_ring_size = cmd.data0;
3341 if (err != 0) {
3342 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3343 goto abort;
3344 }
3345
3346 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3347 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3348 if_setsendqlen(sc->ifp, tx_ring_entries - 1);
3349 if_setsendqready(sc->ifp);
3350
3351 for (slice = 0; slice < sc->num_slices; slice++) {
3352 err = mxge_alloc_slice_rings(&sc->ss[slice],
3353 rx_ring_entries,
3354 tx_ring_entries);
3355 if (err != 0)
3356 goto abort;
3357 }
3358 return 0;
3359
3360 abort:
3361 mxge_free_rings(sc);
3362 return err;
3363
3364 }
3365
3366 static void
mxge_choose_params(int mtu,int * big_buf_size,int * cl_size,int * nbufs)3367 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3368 {
3369 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3370
3371 if (bufsize < MCLBYTES) {
3372 /* easy, everything fits in a single buffer */
3373 *big_buf_size = MCLBYTES;
3374 *cl_size = MCLBYTES;
3375 *nbufs = 1;
3376 return;
3377 }
3378
3379 if (bufsize < MJUMPAGESIZE) {
3380 /* still easy, everything still fits in a single buffer */
3381 *big_buf_size = MJUMPAGESIZE;
3382 *cl_size = MJUMPAGESIZE;
3383 *nbufs = 1;
3384 return;
3385 }
3386 *cl_size = MJUM9BYTES;
3387 *big_buf_size = MJUM9BYTES;
3388 *nbufs = 1;
3389 }
3390
3391 static int
mxge_slice_open(struct mxge_slice_state * ss,int nbufs,int cl_size)3392 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3393 {
3394 mxge_softc_t *sc;
3395 mxge_cmd_t cmd;
3396 bus_dmamap_t map;
3397 int err, i, slice;
3398
3399 sc = ss->sc;
3400 slice = ss - sc->ss;
3401
3402 #if defined(INET) || defined(INET6)
3403 (void)tcp_lro_init(&ss->lc);
3404 #endif
3405 ss->lc.ifp = sc->ifp;
3406
3407 /* get the lanai pointers to the send and receive rings */
3408
3409 err = 0;
3410
3411 cmd.data0 = slice;
3412 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3413 ss->tx.lanai =
3414 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3415 ss->tx.send_go = (volatile uint32_t *)
3416 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3417 ss->tx.send_stop = (volatile uint32_t *)
3418 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3419
3420 cmd.data0 = slice;
3421 err |= mxge_send_cmd(sc,
3422 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3423 ss->rx_small.lanai =
3424 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3425 cmd.data0 = slice;
3426 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3427 ss->rx_big.lanai =
3428 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3429
3430 if (err != 0) {
3431 device_printf(sc->dev,
3432 "failed to get ring sizes or locations\n");
3433 return EIO;
3434 }
3435
3436 /* stock receive rings */
3437 for (i = 0; i <= ss->rx_small.mask; i++) {
3438 map = ss->rx_small.info[i].map;
3439 err = mxge_get_buf_small(ss, map, i);
3440 if (err) {
3441 device_printf(sc->dev, "alloced %d/%d smalls\n",
3442 i, ss->rx_small.mask + 1);
3443 return ENOMEM;
3444 }
3445 }
3446 for (i = 0; i <= ss->rx_big.mask; i++) {
3447 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3448 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3449 }
3450 ss->rx_big.nbufs = nbufs;
3451 ss->rx_big.cl_size = cl_size;
3452 ss->rx_big.mlen = if_getmtu(ss->sc->ifp) + ETHER_HDR_LEN +
3453 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3454 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3455 map = ss->rx_big.info[i].map;
3456 err = mxge_get_buf_big(ss, map, i);
3457 if (err) {
3458 device_printf(sc->dev, "alloced %d/%d bigs\n",
3459 i, ss->rx_big.mask + 1);
3460 return ENOMEM;
3461 }
3462 }
3463 return 0;
3464 }
3465
3466 static int
mxge_open(mxge_softc_t * sc)3467 mxge_open(mxge_softc_t *sc)
3468 {
3469 mxge_cmd_t cmd;
3470 int err, big_bytes, nbufs, slice, cl_size, i;
3471 bus_addr_t bus;
3472 volatile uint8_t *itable;
3473 struct mxge_slice_state *ss;
3474
3475 /* Copy the MAC address in case it was overridden */
3476 bcopy(if_getlladdr(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3477
3478 err = mxge_reset(sc, 1);
3479 if (err != 0) {
3480 device_printf(sc->dev, "failed to reset\n");
3481 return EIO;
3482 }
3483
3484 if (sc->num_slices > 1) {
3485 /* setup the indirection table */
3486 cmd.data0 = sc->num_slices;
3487 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3488 &cmd);
3489
3490 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3491 &cmd);
3492 if (err != 0) {
3493 device_printf(sc->dev,
3494 "failed to setup rss tables\n");
3495 return err;
3496 }
3497
3498 /* just enable an identity mapping */
3499 itable = sc->sram + cmd.data0;
3500 for (i = 0; i < sc->num_slices; i++)
3501 itable[i] = (uint8_t)i;
3502
3503 cmd.data0 = 1;
3504 cmd.data1 = mxge_rss_hash_type;
3505 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3506 if (err != 0) {
3507 device_printf(sc->dev, "failed to enable slices\n");
3508 return err;
3509 }
3510 }
3511
3512 mxge_choose_params(if_getmtu(sc->ifp), &big_bytes, &cl_size, &nbufs);
3513
3514 cmd.data0 = nbufs;
3515 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3516 &cmd);
3517 /* error is only meaningful if we're trying to set
3518 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3519 if (err && nbufs > 1) {
3520 device_printf(sc->dev,
3521 "Failed to set alway-use-n to %d\n",
3522 nbufs);
3523 return EIO;
3524 }
3525 /* Give the firmware the mtu and the big and small buffer
3526 sizes. The firmware wants the big buf size to be a power
3527 of two. Luckily, FreeBSD's clusters are powers of two */
3528 cmd.data0 = if_getmtu(sc->ifp) + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3529 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3530 cmd.data0 = MHLEN - MXGEFW_PAD;
3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3532 &cmd);
3533 cmd.data0 = big_bytes;
3534 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3535
3536 if (err != 0) {
3537 device_printf(sc->dev, "failed to setup params\n");
3538 goto abort;
3539 }
3540
3541 /* Now give him the pointer to the stats block */
3542 for (slice = 0; slice < sc->num_slices; slice++) {
3543 ss = &sc->ss[slice];
3544 cmd.data0 =
3545 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3546 cmd.data1 =
3547 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3548 cmd.data2 = sizeof(struct mcp_irq_data);
3549 cmd.data2 |= (slice << 16);
3550 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3551 }
3552
3553 if (err != 0) {
3554 bus = sc->ss->fw_stats_dma.bus_addr;
3555 bus += offsetof(struct mcp_irq_data, send_done_count);
3556 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3557 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3558 err = mxge_send_cmd(sc,
3559 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3560 &cmd);
3561 /* Firmware cannot support multicast without STATS_DMA_V2 */
3562 sc->fw_multicast_support = 0;
3563 } else {
3564 sc->fw_multicast_support = 1;
3565 }
3566
3567 if (err != 0) {
3568 device_printf(sc->dev, "failed to setup params\n");
3569 goto abort;
3570 }
3571
3572 for (slice = 0; slice < sc->num_slices; slice++) {
3573 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3574 if (err != 0) {
3575 device_printf(sc->dev, "couldn't open slice %d\n",
3576 slice);
3577 goto abort;
3578 }
3579 }
3580
3581 /* Finally, start the firmware running */
3582 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3583 if (err) {
3584 device_printf(sc->dev, "Couldn't bring up link\n");
3585 goto abort;
3586 }
3587 for (slice = 0; slice < sc->num_slices; slice++) {
3588 ss = &sc->ss[slice];
3589 ss->if_drv_flags |= IFF_DRV_RUNNING;
3590 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3591 }
3592 if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING, 0);
3593 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_OACTIVE);
3594
3595 return 0;
3596
3597 abort:
3598 mxge_free_mbufs(sc);
3599
3600 return err;
3601 }
3602
3603 static int
mxge_close(mxge_softc_t * sc,int down)3604 mxge_close(mxge_softc_t *sc, int down)
3605 {
3606 mxge_cmd_t cmd;
3607 int err, old_down_cnt;
3608 struct mxge_slice_state *ss;
3609 int slice;
3610
3611 for (slice = 0; slice < sc->num_slices; slice++) {
3612 ss = &sc->ss[slice];
3613 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3614 }
3615 if_setdrvflagbits(sc->ifp, 0, IFF_DRV_RUNNING);
3616 if (!down) {
3617 old_down_cnt = sc->down_cnt;
3618 wmb();
3619 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3620 if (err) {
3621 device_printf(sc->dev,
3622 "Couldn't bring down link\n");
3623 }
3624 if (old_down_cnt == sc->down_cnt) {
3625 /* wait for down irq */
3626 DELAY(10 * sc->intr_coal_delay);
3627 }
3628 wmb();
3629 if (old_down_cnt == sc->down_cnt) {
3630 device_printf(sc->dev, "never got down irq\n");
3631 }
3632 }
3633 mxge_free_mbufs(sc);
3634
3635 return 0;
3636 }
3637
3638 static void
mxge_setup_cfg_space(mxge_softc_t * sc)3639 mxge_setup_cfg_space(mxge_softc_t *sc)
3640 {
3641 device_t dev = sc->dev;
3642 int reg;
3643 uint16_t lnk, pectl;
3644
3645 /* find the PCIe link width and set max read request to 4KB*/
3646 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) {
3647 lnk = pci_read_config(dev, reg + 0x12, 2);
3648 sc->link_width = (lnk >> 4) & 0x3f;
3649
3650 if (sc->pectl == 0) {
3651 pectl = pci_read_config(dev, reg + 0x8, 2);
3652 pectl = (pectl & ~0x7000) | (5 << 12);
3653 pci_write_config(dev, reg + 0x8, pectl, 2);
3654 sc->pectl = pectl;
3655 } else {
3656 /* restore saved pectl after watchdog reset */
3657 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3658 }
3659 }
3660
3661 /* Enable DMA and Memory space access */
3662 pci_enable_busmaster(dev);
3663 }
3664
3665 static uint32_t
mxge_read_reboot(mxge_softc_t * sc)3666 mxge_read_reboot(mxge_softc_t *sc)
3667 {
3668 device_t dev = sc->dev;
3669 uint32_t vs;
3670
3671 /* find the vendor specific offset */
3672 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3673 device_printf(sc->dev,
3674 "could not find vendor specific offset\n");
3675 return (uint32_t)-1;
3676 }
3677 /* enable read32 mode */
3678 pci_write_config(dev, vs + 0x10, 0x3, 1);
3679 /* tell NIC which register to read */
3680 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3681 return (pci_read_config(dev, vs + 0x14, 4));
3682 }
3683
3684 static void
mxge_watchdog_reset(mxge_softc_t * sc)3685 mxge_watchdog_reset(mxge_softc_t *sc)
3686 {
3687 struct pci_devinfo *dinfo;
3688 struct mxge_slice_state *ss;
3689 int err, running, s, num_tx_slices = 1;
3690 uint32_t reboot;
3691 uint16_t cmd;
3692
3693 err = ENXIO;
3694
3695 device_printf(sc->dev, "Watchdog reset!\n");
3696
3697 /*
3698 * check to see if the NIC rebooted. If it did, then all of
3699 * PCI config space has been reset, and things like the
3700 * busmaster bit will be zero. If this is the case, then we
3701 * must restore PCI config space before the NIC can be used
3702 * again
3703 */
3704 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3705 if (cmd == 0xffff) {
3706 /*
3707 * maybe the watchdog caught the NIC rebooting; wait
3708 * up to 100ms for it to finish. If it does not come
3709 * back, then give up
3710 */
3711 DELAY(1000*100);
3712 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3713 if (cmd == 0xffff) {
3714 device_printf(sc->dev, "NIC disappeared!\n");
3715 }
3716 }
3717 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3718 /* print the reboot status */
3719 reboot = mxge_read_reboot(sc);
3720 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3721 reboot);
3722 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING;
3723 if (running) {
3724 /*
3725 * quiesce NIC so that TX routines will not try to
3726 * xmit after restoration of BAR
3727 */
3728
3729 /* Mark the link as down */
3730 if (sc->link_state) {
3731 sc->link_state = 0;
3732 if_link_state_change(sc->ifp,
3733 LINK_STATE_DOWN);
3734 }
3735
3736 num_tx_slices = sc->num_slices;
3737
3738 /* grab all TX locks to ensure no tx */
3739 for (s = 0; s < num_tx_slices; s++) {
3740 ss = &sc->ss[s];
3741 mtx_lock(&ss->tx.mtx);
3742 }
3743 mxge_close(sc, 1);
3744 }
3745 /* restore PCI configuration space */
3746 dinfo = device_get_ivars(sc->dev);
3747 pci_cfg_restore(sc->dev, dinfo);
3748
3749 /* and redo any changes we made to our config space */
3750 mxge_setup_cfg_space(sc);
3751
3752 /* reload f/w */
3753 err = mxge_load_firmware(sc, 0);
3754 if (err) {
3755 device_printf(sc->dev,
3756 "Unable to re-load f/w\n");
3757 }
3758 if (running) {
3759 if (!err)
3760 err = mxge_open(sc);
3761 /* release all TX locks */
3762 for (s = 0; s < num_tx_slices; s++) {
3763 ss = &sc->ss[s];
3764 mxge_start_locked(ss);
3765 mtx_unlock(&ss->tx.mtx);
3766 }
3767 }
3768 sc->watchdog_resets++;
3769 } else {
3770 device_printf(sc->dev,
3771 "NIC did not reboot, not resetting\n");
3772 err = 0;
3773 }
3774 if (err) {
3775 device_printf(sc->dev, "watchdog reset failed\n");
3776 } else {
3777 if (sc->dying == 2)
3778 sc->dying = 0;
3779 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3780 }
3781 }
3782
3783 static void
mxge_watchdog_task(void * arg,int pending)3784 mxge_watchdog_task(void *arg, int pending)
3785 {
3786 mxge_softc_t *sc = arg;
3787
3788 mtx_lock(&sc->driver_mtx);
3789 mxge_watchdog_reset(sc);
3790 mtx_unlock(&sc->driver_mtx);
3791 }
3792
3793 static void
mxge_warn_stuck(mxge_softc_t * sc,mxge_tx_ring_t * tx,int slice)3794 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3795 {
3796 tx = &sc->ss[slice].tx;
3797 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3798 device_printf(sc->dev,
3799 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3800 tx->req, tx->done, tx->queue_active);
3801 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3802 tx->activate, tx->deactivate);
3803 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3804 tx->pkt_done,
3805 be32toh(sc->ss->fw_stats->send_done_count));
3806 }
3807
3808 static int
mxge_watchdog(mxge_softc_t * sc)3809 mxge_watchdog(mxge_softc_t *sc)
3810 {
3811 mxge_tx_ring_t *tx;
3812 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3813 int i, err = 0;
3814
3815 /* see if we have outstanding transmits, which
3816 have been pending for more than mxge_ticks */
3817 for (i = 0; (i < sc->num_slices) && (err == 0); i++) {
3818 tx = &sc->ss[i].tx;
3819 if (tx->req != tx->done &&
3820 tx->watchdog_req != tx->watchdog_done &&
3821 tx->done == tx->watchdog_done) {
3822 /* check for pause blocking before resetting */
3823 if (tx->watchdog_rx_pause == rx_pause) {
3824 mxge_warn_stuck(sc, tx, i);
3825 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3826 return (ENXIO);
3827 }
3828 else
3829 device_printf(sc->dev, "Flow control blocking "
3830 "xmits, check link partner\n");
3831 }
3832
3833 tx->watchdog_req = tx->req;
3834 tx->watchdog_done = tx->done;
3835 tx->watchdog_rx_pause = rx_pause;
3836 }
3837
3838 if (sc->need_media_probe)
3839 mxge_media_probe(sc);
3840 return (err);
3841 }
3842
3843 static uint64_t
mxge_get_counter(if_t ifp,ift_counter cnt)3844 mxge_get_counter(if_t ifp, ift_counter cnt)
3845 {
3846 struct mxge_softc *sc;
3847 uint64_t rv;
3848
3849 sc = if_getsoftc(ifp);
3850 rv = 0;
3851
3852 switch (cnt) {
3853 case IFCOUNTER_IPACKETS:
3854 for (int s = 0; s < sc->num_slices; s++)
3855 rv += sc->ss[s].ipackets;
3856 return (rv);
3857 case IFCOUNTER_OPACKETS:
3858 for (int s = 0; s < sc->num_slices; s++)
3859 rv += sc->ss[s].opackets;
3860 return (rv);
3861 case IFCOUNTER_OERRORS:
3862 for (int s = 0; s < sc->num_slices; s++)
3863 rv += sc->ss[s].oerrors;
3864 return (rv);
3865 case IFCOUNTER_OBYTES:
3866 for (int s = 0; s < sc->num_slices; s++)
3867 rv += sc->ss[s].obytes;
3868 return (rv);
3869 case IFCOUNTER_OMCASTS:
3870 for (int s = 0; s < sc->num_slices; s++)
3871 rv += sc->ss[s].omcasts;
3872 return (rv);
3873 case IFCOUNTER_OQDROPS:
3874 for (int s = 0; s < sc->num_slices; s++)
3875 rv += sc->ss[s].tx.br->br_drops;
3876 return (rv);
3877 default:
3878 return (if_get_counter_default(ifp, cnt));
3879 }
3880 }
3881
3882 static void
mxge_tick(void * arg)3883 mxge_tick(void *arg)
3884 {
3885 mxge_softc_t *sc = arg;
3886 u_long pkts = 0;
3887 int err = 0;
3888 int running, ticks;
3889 uint16_t cmd;
3890
3891 ticks = mxge_ticks;
3892 running = if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING;
3893 if (running) {
3894 if (!sc->watchdog_countdown) {
3895 err = mxge_watchdog(sc);
3896 sc->watchdog_countdown = 4;
3897 }
3898 sc->watchdog_countdown--;
3899 }
3900 if (pkts == 0) {
3901 /* ensure NIC did not suffer h/w fault while idle */
3902 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3903 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3904 sc->dying = 2;
3905 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3906 err = ENXIO;
3907 }
3908 /* look less often if NIC is idle */
3909 ticks *= 4;
3910 }
3911
3912 if (err == 0)
3913 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3914
3915 }
3916
3917 static int
mxge_media_change(if_t ifp)3918 mxge_media_change(if_t ifp)
3919 {
3920 return EINVAL;
3921 }
3922
3923 static int
mxge_change_mtu(mxge_softc_t * sc,int mtu)3924 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3925 {
3926 if_t ifp = sc->ifp;
3927 int real_mtu, old_mtu;
3928 int err = 0;
3929
3930 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3931 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3932 return EINVAL;
3933 mtx_lock(&sc->driver_mtx);
3934 old_mtu = if_getmtu(ifp);
3935 if_setmtu(ifp, mtu);
3936 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3937 mxge_close(sc, 0);
3938 err = mxge_open(sc);
3939 if (err != 0) {
3940 if_setmtu(ifp, old_mtu);
3941 mxge_close(sc, 0);
3942 (void) mxge_open(sc);
3943 }
3944 }
3945 mtx_unlock(&sc->driver_mtx);
3946 return err;
3947 }
3948
3949 static void
mxge_media_status(if_t ifp,struct ifmediareq * ifmr)3950 mxge_media_status(if_t ifp, struct ifmediareq *ifmr)
3951 {
3952 mxge_softc_t *sc = if_getsoftc(ifp);
3953
3954 if (sc == NULL)
3955 return;
3956 ifmr->ifm_status = IFM_AVALID;
3957 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3958 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3959 ifmr->ifm_active |= sc->current_media;
3960 }
3961
3962 static int
mxge_fetch_i2c(mxge_softc_t * sc,struct ifi2creq * i2c)3963 mxge_fetch_i2c(mxge_softc_t *sc, struct ifi2creq *i2c)
3964 {
3965 mxge_cmd_t cmd;
3966 uint32_t i2c_args;
3967 int i, ms, err;
3968
3969 if (i2c->dev_addr != 0xA0 &&
3970 i2c->dev_addr != 0xA2)
3971 return (EINVAL);
3972 if (i2c->len > sizeof(i2c->data))
3973 return (EINVAL);
3974
3975 for (i = 0; i < i2c->len; i++) {
3976 i2c_args = i2c->dev_addr << 0x8;
3977 i2c_args |= i2c->offset + i;
3978 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3979 cmd.data1 = i2c_args;
3980 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3981
3982 if (err != MXGEFW_CMD_OK)
3983 return (EIO);
3984 /* now we wait for the data to be cached */
3985 cmd.data0 = i2c_args & 0xff;
3986 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3987 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3988 cmd.data0 = i2c_args & 0xff;
3989 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3990 if (err == EBUSY)
3991 DELAY(1000);
3992 }
3993 if (err != MXGEFW_CMD_OK)
3994 return (EIO);
3995 i2c->data[i] = cmd.data0;
3996 }
3997 return (0);
3998 }
3999
4000 static int
mxge_ioctl(if_t ifp,u_long command,caddr_t data)4001 mxge_ioctl(if_t ifp, u_long command, caddr_t data)
4002 {
4003 mxge_softc_t *sc = if_getsoftc(ifp);
4004 struct ifreq *ifr = (struct ifreq *)data;
4005 struct ifi2creq i2c;
4006 int err, mask;
4007
4008 err = 0;
4009 switch (command) {
4010 case SIOCSIFMTU:
4011 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4012 break;
4013
4014 case SIOCSIFFLAGS:
4015 mtx_lock(&sc->driver_mtx);
4016 if (sc->dying) {
4017 mtx_unlock(&sc->driver_mtx);
4018 return EINVAL;
4019 }
4020 if (if_getflags(ifp) & IFF_UP) {
4021 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
4022 err = mxge_open(sc);
4023 } else {
4024 /* take care of promis can allmulti
4025 flag chages */
4026 mxge_change_promisc(sc,
4027 if_getflags(ifp) & IFF_PROMISC);
4028 mxge_set_multicast_list(sc);
4029 }
4030 } else {
4031 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
4032 mxge_close(sc, 0);
4033 }
4034 }
4035 mtx_unlock(&sc->driver_mtx);
4036 break;
4037
4038 case SIOCADDMULTI:
4039 case SIOCDELMULTI:
4040 mtx_lock(&sc->driver_mtx);
4041 if (sc->dying) {
4042 mtx_unlock(&sc->driver_mtx);
4043 return (EINVAL);
4044 }
4045 mxge_set_multicast_list(sc);
4046 mtx_unlock(&sc->driver_mtx);
4047 break;
4048
4049 case SIOCSIFCAP:
4050 mtx_lock(&sc->driver_mtx);
4051 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
4052 if (mask & IFCAP_TXCSUM) {
4053 if (IFCAP_TXCSUM & if_getcapenable(ifp)) {
4054 mask &= ~IFCAP_TSO4;
4055 if_setcapenablebit(ifp, 0, (IFCAP_TXCSUM|IFCAP_TSO4));
4056 if_sethwassistbits(ifp, 0, (CSUM_TCP | CSUM_UDP));
4057 } else {
4058 if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
4059 if_sethwassistbits(ifp, (CSUM_TCP | CSUM_UDP), 0);
4060 }
4061 }
4062 if (mask & IFCAP_RXCSUM) {
4063 if (IFCAP_RXCSUM & if_getcapenable(ifp)) {
4064 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
4065 } else {
4066 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
4067 }
4068 }
4069 if (mask & IFCAP_TSO4) {
4070 if (IFCAP_TSO4 & if_getcapenable(ifp)) {
4071 if_setcapenablebit(ifp, 0, IFCAP_TSO4);
4072 } else if (IFCAP_TXCSUM & if_getcapenable(ifp)) {
4073 if_setcapenablebit(ifp, IFCAP_TSO4, 0);
4074 if_sethwassistbits(ifp, CSUM_TSO, 0);
4075 } else {
4076 printf("mxge requires tx checksum offload"
4077 " be enabled to use TSO\n");
4078 err = EINVAL;
4079 }
4080 }
4081 #if IFCAP_TSO6
4082 if (mask & IFCAP_TXCSUM_IPV6) {
4083 if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) {
4084 mask &= ~IFCAP_TSO6;
4085 if_setcapenablebit(ifp, 0,
4086 IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
4087 if_sethwassistbits(ifp, 0,
4088 CSUM_TCP_IPV6 | CSUM_UDP);
4089 } else {
4090 if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
4091 if_sethwassistbits(ifp,
4092 CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0);
4093 }
4094 }
4095 if (mask & IFCAP_RXCSUM_IPV6) {
4096 if (IFCAP_RXCSUM_IPV6 & if_getcapenable(ifp)) {
4097 if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
4098 } else {
4099 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
4100 }
4101 }
4102 if (mask & IFCAP_TSO6) {
4103 if (IFCAP_TSO6 & if_getcapenable(ifp)) {
4104 if_setcapenablebit(ifp, 0, IFCAP_TSO6);
4105 } else if (IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp)) {
4106 if_setcapenablebit(ifp, IFCAP_TSO6, 0);
4107 if_sethwassistbits(ifp, CSUM_TSO, 0);
4108 } else {
4109 printf("mxge requires tx checksum offload"
4110 " be enabled to use TSO\n");
4111 err = EINVAL;
4112 }
4113 }
4114 #endif /*IFCAP_TSO6 */
4115
4116 if (mask & IFCAP_LRO)
4117 if_togglecapenable(ifp, IFCAP_LRO);
4118 if (mask & IFCAP_VLAN_HWTAGGING)
4119 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
4120 if (mask & IFCAP_VLAN_HWTSO)
4121 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
4122
4123 if (!(if_getcapabilities(ifp) & IFCAP_VLAN_HWTSO) ||
4124 !(if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING))
4125 if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWTSO);
4126
4127 mtx_unlock(&sc->driver_mtx);
4128 VLAN_CAPABILITIES(ifp);
4129
4130 break;
4131
4132 case SIOCGIFMEDIA:
4133 mtx_lock(&sc->driver_mtx);
4134 if (sc->dying) {
4135 mtx_unlock(&sc->driver_mtx);
4136 return (EINVAL);
4137 }
4138 mxge_media_probe(sc);
4139 mtx_unlock(&sc->driver_mtx);
4140 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4141 &sc->media, command);
4142 break;
4143
4144 case SIOCGI2C:
4145 if (sc->connector != MXGE_XFP &&
4146 sc->connector != MXGE_SFP) {
4147 err = ENXIO;
4148 break;
4149 }
4150 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
4151 if (err != 0)
4152 break;
4153 mtx_lock(&sc->driver_mtx);
4154 if (sc->dying) {
4155 mtx_unlock(&sc->driver_mtx);
4156 return (EINVAL);
4157 }
4158 err = mxge_fetch_i2c(sc, &i2c);
4159 mtx_unlock(&sc->driver_mtx);
4160 if (err == 0)
4161 err = copyout(&i2c, ifr_data_get_ptr(ifr),
4162 sizeof(i2c));
4163 break;
4164 default:
4165 err = ether_ioctl(ifp, command, data);
4166 break;
4167 }
4168 return err;
4169 }
4170
4171 static void
mxge_fetch_tunables(mxge_softc_t * sc)4172 mxge_fetch_tunables(mxge_softc_t *sc)
4173 {
4174
4175 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4176 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4177 &mxge_flow_control);
4178 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4179 &mxge_intr_coal_delay);
4180 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4181 &mxge_nvidia_ecrc_enable);
4182 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4183 &mxge_force_firmware);
4184 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4185 &mxge_deassert_wait);
4186 TUNABLE_INT_FETCH("hw.mxge.verbose",
4187 &mxge_verbose);
4188 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4189 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4190 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4191 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4192 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4193 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4194
4195 if (bootverbose)
4196 mxge_verbose = 1;
4197 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4198 mxge_intr_coal_delay = 30;
4199 if (mxge_ticks == 0)
4200 mxge_ticks = hz / 2;
4201 sc->pause = mxge_flow_control;
4202 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4203 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4204 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4205 }
4206 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4207 mxge_initial_mtu < ETHER_MIN_LEN)
4208 mxge_initial_mtu = ETHERMTU_JUMBO;
4209
4210 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4211 mxge_throttle = MXGE_MAX_THROTTLE;
4212 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4213 mxge_throttle = MXGE_MIN_THROTTLE;
4214 sc->throttle = mxge_throttle;
4215 }
4216
4217 static void
mxge_free_slices(mxge_softc_t * sc)4218 mxge_free_slices(mxge_softc_t *sc)
4219 {
4220 struct mxge_slice_state *ss;
4221 int i;
4222
4223 if (sc->ss == NULL)
4224 return;
4225
4226 for (i = 0; i < sc->num_slices; i++) {
4227 ss = &sc->ss[i];
4228 if (ss->fw_stats != NULL) {
4229 mxge_dma_free(&ss->fw_stats_dma);
4230 ss->fw_stats = NULL;
4231 if (ss->tx.br != NULL) {
4232 drbr_free(ss->tx.br, M_DEVBUF);
4233 ss->tx.br = NULL;
4234 }
4235 mtx_destroy(&ss->tx.mtx);
4236 }
4237 if (ss->rx_done.entry != NULL) {
4238 mxge_dma_free(&ss->rx_done.dma);
4239 ss->rx_done.entry = NULL;
4240 }
4241 }
4242 free(sc->ss, M_DEVBUF);
4243 sc->ss = NULL;
4244 }
4245
4246 static int
mxge_alloc_slices(mxge_softc_t * sc)4247 mxge_alloc_slices(mxge_softc_t *sc)
4248 {
4249 mxge_cmd_t cmd;
4250 struct mxge_slice_state *ss;
4251 size_t bytes;
4252 int err, i, max_intr_slots;
4253
4254 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4255 if (err != 0) {
4256 device_printf(sc->dev, "Cannot determine rx ring size\n");
4257 return err;
4258 }
4259 sc->rx_ring_size = cmd.data0;
4260 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4261
4262 bytes = sizeof (*sc->ss) * sc->num_slices;
4263 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4264 if (sc->ss == NULL)
4265 return (ENOMEM);
4266 for (i = 0; i < sc->num_slices; i++) {
4267 ss = &sc->ss[i];
4268
4269 ss->sc = sc;
4270
4271 /* allocate per-slice rx interrupt queues */
4272
4273 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4274 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4275 if (err != 0)
4276 goto abort;
4277 ss->rx_done.entry = ss->rx_done.dma.addr;
4278 bzero(ss->rx_done.entry, bytes);
4279
4280 /*
4281 * allocate the per-slice firmware stats; stats
4282 * (including tx) are used used only on the first
4283 * slice for now
4284 */
4285
4286 bytes = sizeof (*ss->fw_stats);
4287 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4288 sizeof (*ss->fw_stats), 64);
4289 if (err != 0)
4290 goto abort;
4291 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4292 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4293 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4294 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4295 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4296 &ss->tx.mtx);
4297 }
4298
4299 return (0);
4300
4301 abort:
4302 mxge_free_slices(sc);
4303 return (ENOMEM);
4304 }
4305
4306 static void
mxge_slice_probe(mxge_softc_t * sc)4307 mxge_slice_probe(mxge_softc_t *sc)
4308 {
4309 mxge_cmd_t cmd;
4310 char *old_fw;
4311 int msix_cnt, status, max_intr_slots;
4312
4313 sc->num_slices = 1;
4314 /*
4315 * don't enable multiple slices if they are not enabled,
4316 * or if this is not an SMP system
4317 */
4318
4319 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4320 return;
4321
4322 /* see how many MSI-X interrupts are available */
4323 msix_cnt = pci_msix_count(sc->dev);
4324 if (msix_cnt < 2)
4325 return;
4326
4327 /* now load the slice aware firmware see what it supports */
4328 old_fw = sc->fw_name;
4329 if (old_fw == mxge_fw_aligned)
4330 sc->fw_name = mxge_fw_rss_aligned;
4331 else
4332 sc->fw_name = mxge_fw_rss_unaligned;
4333 status = mxge_load_firmware(sc, 0);
4334 if (status != 0) {
4335 device_printf(sc->dev, "Falling back to a single slice\n");
4336 return;
4337 }
4338
4339 /* try to send a reset command to the card to see if it
4340 is alive */
4341 memset(&cmd, 0, sizeof (cmd));
4342 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4343 if (status != 0) {
4344 device_printf(sc->dev, "failed reset\n");
4345 goto abort_with_fw;
4346 }
4347
4348 /* get rx ring size */
4349 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4350 if (status != 0) {
4351 device_printf(sc->dev, "Cannot determine rx ring size\n");
4352 goto abort_with_fw;
4353 }
4354 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4355
4356 /* tell it the size of the interrupt queues */
4357 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4358 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4359 if (status != 0) {
4360 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4361 goto abort_with_fw;
4362 }
4363
4364 /* ask the maximum number of slices it supports */
4365 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4366 if (status != 0) {
4367 device_printf(sc->dev,
4368 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4369 goto abort_with_fw;
4370 }
4371 sc->num_slices = cmd.data0;
4372 if (sc->num_slices > msix_cnt)
4373 sc->num_slices = msix_cnt;
4374
4375 if (mxge_max_slices == -1) {
4376 /* cap to number of CPUs in system */
4377 if (sc->num_slices > mp_ncpus)
4378 sc->num_slices = mp_ncpus;
4379 } else {
4380 if (sc->num_slices > mxge_max_slices)
4381 sc->num_slices = mxge_max_slices;
4382 }
4383 /* make sure it is a power of two */
4384 while (sc->num_slices & (sc->num_slices - 1))
4385 sc->num_slices--;
4386
4387 if (mxge_verbose)
4388 device_printf(sc->dev, "using %d slices\n",
4389 sc->num_slices);
4390
4391 return;
4392
4393 abort_with_fw:
4394 sc->fw_name = old_fw;
4395 (void) mxge_load_firmware(sc, 0);
4396 }
4397
4398 static int
mxge_add_msix_irqs(mxge_softc_t * sc)4399 mxge_add_msix_irqs(mxge_softc_t *sc)
4400 {
4401 size_t bytes;
4402 int count, err, i, rid;
4403
4404 rid = PCIR_BAR(2);
4405 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4406 &rid, RF_ACTIVE);
4407
4408 if (sc->msix_table_res == NULL) {
4409 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4410 return ENXIO;
4411 }
4412
4413 count = sc->num_slices;
4414 err = pci_alloc_msix(sc->dev, &count);
4415 if (err != 0) {
4416 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4417 "err = %d \n", sc->num_slices, err);
4418 goto abort_with_msix_table;
4419 }
4420 if (count < sc->num_slices) {
4421 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4422 count, sc->num_slices);
4423 device_printf(sc->dev,
4424 "Try setting hw.mxge.max_slices to %d\n",
4425 count);
4426 err = ENOSPC;
4427 goto abort_with_msix;
4428 }
4429 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4430 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4431 if (sc->msix_irq_res == NULL) {
4432 err = ENOMEM;
4433 goto abort_with_msix;
4434 }
4435
4436 for (i = 0; i < sc->num_slices; i++) {
4437 rid = i + 1;
4438 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4439 SYS_RES_IRQ,
4440 &rid, RF_ACTIVE);
4441 if (sc->msix_irq_res[i] == NULL) {
4442 device_printf(sc->dev, "couldn't allocate IRQ res"
4443 " for message %d\n", i);
4444 err = ENXIO;
4445 goto abort_with_res;
4446 }
4447 }
4448
4449 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4450 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4451
4452 for (i = 0; i < sc->num_slices; i++) {
4453 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4454 INTR_TYPE_NET | INTR_MPSAFE, NULL,
4455 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4456 if (err != 0) {
4457 device_printf(sc->dev, "couldn't setup intr for "
4458 "message %d\n", i);
4459 goto abort_with_intr;
4460 }
4461 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4462 sc->msix_ih[i], "s%d", i);
4463 }
4464
4465 if (mxge_verbose) {
4466 device_printf(sc->dev, "using %d msix IRQs:",
4467 sc->num_slices);
4468 for (i = 0; i < sc->num_slices; i++)
4469 printf(" %jd", rman_get_start(sc->msix_irq_res[i]));
4470 printf("\n");
4471 }
4472 return (0);
4473
4474 abort_with_intr:
4475 for (i = 0; i < sc->num_slices; i++) {
4476 if (sc->msix_ih[i] != NULL) {
4477 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4478 sc->msix_ih[i]);
4479 sc->msix_ih[i] = NULL;
4480 }
4481 }
4482 free(sc->msix_ih, M_DEVBUF);
4483
4484 abort_with_res:
4485 for (i = 0; i < sc->num_slices; i++) {
4486 rid = i + 1;
4487 if (sc->msix_irq_res[i] != NULL)
4488 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4489 sc->msix_irq_res[i]);
4490 sc->msix_irq_res[i] = NULL;
4491 }
4492 free(sc->msix_irq_res, M_DEVBUF);
4493
4494 abort_with_msix:
4495 pci_release_msi(sc->dev);
4496
4497 abort_with_msix_table:
4498 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4499 sc->msix_table_res);
4500
4501 return err;
4502 }
4503
4504 static int
mxge_add_single_irq(mxge_softc_t * sc)4505 mxge_add_single_irq(mxge_softc_t *sc)
4506 {
4507 int count, err, rid;
4508
4509 count = pci_msi_count(sc->dev);
4510 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4511 rid = 1;
4512 } else {
4513 rid = 0;
4514 sc->legacy_irq = 1;
4515 }
4516 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
4517 RF_SHAREABLE | RF_ACTIVE);
4518 if (sc->irq_res == NULL) {
4519 device_printf(sc->dev, "could not alloc interrupt\n");
4520 return ENXIO;
4521 }
4522 if (mxge_verbose)
4523 device_printf(sc->dev, "using %s irq %jd\n",
4524 sc->legacy_irq ? "INTx" : "MSI",
4525 rman_get_start(sc->irq_res));
4526 err = bus_setup_intr(sc->dev, sc->irq_res,
4527 INTR_TYPE_NET | INTR_MPSAFE, NULL,
4528 mxge_intr, &sc->ss[0], &sc->ih);
4529 if (err != 0) {
4530 bus_release_resource(sc->dev, SYS_RES_IRQ,
4531 sc->legacy_irq ? 0 : 1, sc->irq_res);
4532 if (!sc->legacy_irq)
4533 pci_release_msi(sc->dev);
4534 }
4535 return err;
4536 }
4537
4538 static void
mxge_rem_msix_irqs(mxge_softc_t * sc)4539 mxge_rem_msix_irqs(mxge_softc_t *sc)
4540 {
4541 int i, rid;
4542
4543 for (i = 0; i < sc->num_slices; i++) {
4544 if (sc->msix_ih[i] != NULL) {
4545 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4546 sc->msix_ih[i]);
4547 sc->msix_ih[i] = NULL;
4548 }
4549 }
4550 free(sc->msix_ih, M_DEVBUF);
4551
4552 for (i = 0; i < sc->num_slices; i++) {
4553 rid = i + 1;
4554 if (sc->msix_irq_res[i] != NULL)
4555 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4556 sc->msix_irq_res[i]);
4557 sc->msix_irq_res[i] = NULL;
4558 }
4559 free(sc->msix_irq_res, M_DEVBUF);
4560
4561 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4562 sc->msix_table_res);
4563
4564 pci_release_msi(sc->dev);
4565 return;
4566 }
4567
4568 static void
mxge_rem_single_irq(mxge_softc_t * sc)4569 mxge_rem_single_irq(mxge_softc_t *sc)
4570 {
4571 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4572 bus_release_resource(sc->dev, SYS_RES_IRQ,
4573 sc->legacy_irq ? 0 : 1, sc->irq_res);
4574 if (!sc->legacy_irq)
4575 pci_release_msi(sc->dev);
4576 }
4577
4578 static void
mxge_rem_irq(mxge_softc_t * sc)4579 mxge_rem_irq(mxge_softc_t *sc)
4580 {
4581 if (sc->num_slices > 1)
4582 mxge_rem_msix_irqs(sc);
4583 else
4584 mxge_rem_single_irq(sc);
4585 }
4586
4587 static int
mxge_add_irq(mxge_softc_t * sc)4588 mxge_add_irq(mxge_softc_t *sc)
4589 {
4590 int err;
4591
4592 if (sc->num_slices > 1)
4593 err = mxge_add_msix_irqs(sc);
4594 else
4595 err = mxge_add_single_irq(sc);
4596
4597 if (0 && err == 0 && sc->num_slices > 1) {
4598 mxge_rem_msix_irqs(sc);
4599 err = mxge_add_msix_irqs(sc);
4600 }
4601 return err;
4602 }
4603
4604 static int
mxge_attach(device_t dev)4605 mxge_attach(device_t dev)
4606 {
4607 mxge_cmd_t cmd;
4608 mxge_softc_t *sc = device_get_softc(dev);
4609 if_t ifp;
4610 int err, rid;
4611
4612 sc->dev = dev;
4613 mxge_fetch_tunables(sc);
4614
4615 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4616 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4617 taskqueue_thread_enqueue, &sc->tq);
4618
4619 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4620 1, /* alignment */
4621 0, /* boundary */
4622 BUS_SPACE_MAXADDR, /* low */
4623 BUS_SPACE_MAXADDR, /* high */
4624 NULL, NULL, /* filter */
4625 65536 + 256, /* maxsize */
4626 MXGE_MAX_SEND_DESC, /* num segs */
4627 65536, /* maxsegsize */
4628 0, /* flags */
4629 NULL, NULL, /* lock */
4630 &sc->parent_dmat); /* tag */
4631
4632 if (err != 0) {
4633 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4634 err);
4635 goto abort_with_tq;
4636 }
4637
4638 ifp = sc->ifp = if_alloc(IFT_ETHER);
4639 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4640
4641 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4642 device_get_nameunit(dev));
4643 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4644 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4645 "%s:drv", device_get_nameunit(dev));
4646 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4647 MTX_NETWORK_LOCK, MTX_DEF);
4648
4649 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4650
4651 mxge_setup_cfg_space(sc);
4652
4653 /* Map the board into the kernel */
4654 rid = PCIR_BARS;
4655 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
4656 RF_ACTIVE);
4657 if (sc->mem_res == NULL) {
4658 device_printf(dev, "could not map memory\n");
4659 err = ENXIO;
4660 goto abort_with_lock;
4661 }
4662 sc->sram = rman_get_virtual(sc->mem_res);
4663 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4664 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4665 device_printf(dev, "impossible memory region size %jd\n",
4666 rman_get_size(sc->mem_res));
4667 err = ENXIO;
4668 goto abort_with_mem_res;
4669 }
4670
4671 /* make NULL terminated copy of the EEPROM strings section of
4672 lanai SRAM */
4673 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4674 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4675 rman_get_bushandle(sc->mem_res),
4676 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4677 sc->eeprom_strings,
4678 MXGE_EEPROM_STRINGS_SIZE - 2);
4679 err = mxge_parse_strings(sc);
4680 if (err != 0)
4681 goto abort_with_mem_res;
4682
4683 /* Enable write combining for efficient use of PCIe bus */
4684 mxge_enable_wc(sc);
4685
4686 /* Allocate the out of band dma memory */
4687 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4688 sizeof (mxge_cmd_t), 64);
4689 if (err != 0)
4690 goto abort_with_mem_res;
4691 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4692 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4693 if (err != 0)
4694 goto abort_with_cmd_dma;
4695
4696 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4697 if (err != 0)
4698 goto abort_with_zeropad_dma;
4699
4700 /* select & load the firmware */
4701 err = mxge_select_firmware(sc);
4702 if (err != 0)
4703 goto abort_with_dmabench;
4704 sc->intr_coal_delay = mxge_intr_coal_delay;
4705
4706 mxge_slice_probe(sc);
4707 err = mxge_alloc_slices(sc);
4708 if (err != 0)
4709 goto abort_with_dmabench;
4710
4711 err = mxge_reset(sc, 0);
4712 if (err != 0)
4713 goto abort_with_slices;
4714
4715 err = mxge_alloc_rings(sc);
4716 if (err != 0) {
4717 device_printf(sc->dev, "failed to allocate rings\n");
4718 goto abort_with_slices;
4719 }
4720
4721 err = mxge_add_irq(sc);
4722 if (err != 0) {
4723 device_printf(sc->dev, "failed to add irq\n");
4724 goto abort_with_rings;
4725 }
4726
4727 if_setbaudrate(ifp, IF_Gbps(10));
4728 if_setcapabilities(ifp, IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4729 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4730 IFCAP_RXCSUM_IPV6);
4731 #if defined(INET) || defined(INET6)
4732 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
4733 #endif
4734
4735 #ifdef MXGE_NEW_VLAN_API
4736 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
4737
4738 /* Only FW 1.4.32 and newer can do TSO over vlans */
4739 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4740 sc->fw_ver_tiny >= 32)
4741 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
4742 #endif
4743 sc->max_mtu = mxge_max_mtu(sc);
4744 if (sc->max_mtu >= 9000)
4745 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
4746 else
4747 device_printf(dev, "MTU limited to %d. Install "
4748 "latest firmware for 9000 byte jumbo support\n",
4749 sc->max_mtu - ETHER_HDR_LEN);
4750 if_sethwassist(ifp, CSUM_TCP | CSUM_UDP | CSUM_TSO);
4751 if_sethwassistbits(ifp, CSUM_TCP_IPV6 | CSUM_UDP_IPV6, 0);
4752 /* check to see if f/w supports TSO for IPv6 */
4753 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4754 if (CSUM_TCP_IPV6)
4755 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
4756 sc->max_tso6_hlen = min(cmd.data0,
4757 sizeof (sc->ss[0].scratch));
4758 }
4759 if_setcapenable(ifp, if_getcapabilities(ifp));
4760 if (sc->lro_cnt == 0)
4761 if_setcapenablebit(ifp, 0, IFCAP_LRO);
4762 if_setinitfn(ifp, mxge_init);
4763 if_setsoftc(ifp, sc);
4764 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
4765 if_setioctlfn(ifp, mxge_ioctl);
4766 if_setstartfn(ifp, mxge_start);
4767 if_setgetcounterfn(ifp, mxge_get_counter);
4768 if_sethwtsomax(ifp, IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
4769 if_sethwtsomaxsegcount(ifp, sc->ss[0].tx.max_desc);
4770 if_sethwtsomaxsegsize(ifp, IP_MAXPACKET);
4771 /* Initialise the ifmedia structure */
4772 ifmedia_init(&sc->media, 0, mxge_media_change,
4773 mxge_media_status);
4774 mxge_media_init(sc);
4775 mxge_media_probe(sc);
4776 sc->dying = 0;
4777 ether_ifattach(ifp, sc->mac_addr);
4778 /* ether_ifattach sets mtu to ETHERMTU */
4779 if (mxge_initial_mtu != ETHERMTU)
4780 mxge_change_mtu(sc, mxge_initial_mtu);
4781
4782 mxge_add_sysctls(sc);
4783 if_settransmitfn(ifp, mxge_transmit);
4784 if_setqflushfn(ifp, mxge_qflush);
4785 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4786 device_get_nameunit(sc->dev));
4787 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4788 return 0;
4789
4790 abort_with_rings:
4791 mxge_free_rings(sc);
4792 abort_with_slices:
4793 mxge_free_slices(sc);
4794 abort_with_dmabench:
4795 mxge_dma_free(&sc->dmabench_dma);
4796 abort_with_zeropad_dma:
4797 mxge_dma_free(&sc->zeropad_dma);
4798 abort_with_cmd_dma:
4799 mxge_dma_free(&sc->cmd_dma);
4800 abort_with_mem_res:
4801 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4802 abort_with_lock:
4803 pci_disable_busmaster(dev);
4804 mtx_destroy(&sc->cmd_mtx);
4805 mtx_destroy(&sc->driver_mtx);
4806 if_free(ifp);
4807 bus_dma_tag_destroy(sc->parent_dmat);
4808 abort_with_tq:
4809 if (sc->tq != NULL) {
4810 taskqueue_drain(sc->tq, &sc->watchdog_task);
4811 taskqueue_free(sc->tq);
4812 sc->tq = NULL;
4813 }
4814 return err;
4815 }
4816
4817 static int
mxge_detach(device_t dev)4818 mxge_detach(device_t dev)
4819 {
4820 mxge_softc_t *sc = device_get_softc(dev);
4821
4822 if (mxge_vlans_active(sc)) {
4823 device_printf(sc->dev,
4824 "Detach vlans before removing module\n");
4825 return EBUSY;
4826 }
4827 mtx_lock(&sc->driver_mtx);
4828 sc->dying = 1;
4829 if (if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING)
4830 mxge_close(sc, 0);
4831 mtx_unlock(&sc->driver_mtx);
4832 ether_ifdetach(sc->ifp);
4833 if (sc->tq != NULL) {
4834 taskqueue_drain(sc->tq, &sc->watchdog_task);
4835 taskqueue_free(sc->tq);
4836 sc->tq = NULL;
4837 }
4838 callout_drain(&sc->co_hdl);
4839 ifmedia_removeall(&sc->media);
4840 mxge_dummy_rdma(sc, 0);
4841 mxge_rem_sysctls(sc);
4842 mxge_rem_irq(sc);
4843 mxge_free_rings(sc);
4844 mxge_free_slices(sc);
4845 mxge_dma_free(&sc->dmabench_dma);
4846 mxge_dma_free(&sc->zeropad_dma);
4847 mxge_dma_free(&sc->cmd_dma);
4848 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4849 pci_disable_busmaster(dev);
4850 mtx_destroy(&sc->cmd_mtx);
4851 mtx_destroy(&sc->driver_mtx);
4852 if_free(sc->ifp);
4853 bus_dma_tag_destroy(sc->parent_dmat);
4854 return 0;
4855 }
4856
4857 static int
mxge_shutdown(device_t dev)4858 mxge_shutdown(device_t dev)
4859 {
4860 return 0;
4861 }
4862
4863 /*
4864 This file uses Myri10GE driver indentation.
4865
4866 Local Variables:
4867 c-file-style:"linux"
4868 tab-width:8
4869 End:
4870 */
4871