xref: /freebsd/sys/dev/mxge/if_mxge.c (revision 0bb263df82e129f5f8c82da6deb55dfe10daa677)
1 /******************************************************************************
2 
3 Copyright (c) 2006, Myricom Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Myricom Inc, nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/linker.h>
40 #include <sys/firmware.h>
41 #include <sys/endian.h>
42 #include <sys/sockio.h>
43 #include <sys/mbuf.h>
44 #include <sys/malloc.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/module.h>
49 #include <sys/memrange.h>
50 #include <sys/socket.h>
51 #include <sys/sysctl.h>
52 #include <sys/sx.h>
53 
54 #include <net/if.h>
55 #include <net/if_arp.h>
56 #include <net/ethernet.h>
57 #include <net/if_dl.h>
58 #include <net/if_media.h>
59 
60 #include <net/bpf.h>
61 
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
64 #include <net/zlib.h>
65 
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 #include <netinet/tcp.h>
70 
71 #include <machine/bus.h>
72 #include <machine/in_cksum.h>
73 #include <machine/resource.h>
74 #include <sys/bus.h>
75 #include <sys/rman.h>
76 
77 #include <dev/pci/pcireg.h>
78 #include <dev/pci/pcivar.h>
79 
80 #include <vm/vm.h>		/* for pmap_mapdev() */
81 #include <vm/pmap.h>
82 
83 #if defined(__i386) || defined(__amd64)
84 #include <machine/specialreg.h>
85 #endif
86 
87 #include <dev/mxge/mxge_mcp.h>
88 #include <dev/mxge/mcp_gen_header.h>
89 #include <dev/mxge/if_mxge_var.h>
90 
91 /* tunable params */
92 static int mxge_nvidia_ecrc_enable = 1;
93 static int mxge_force_firmware = 0;
94 static int mxge_intr_coal_delay = 30;
95 static int mxge_deassert_wait = 1;
96 static int mxge_flow_control = 1;
97 static int mxge_verbose = 0;
98 static int mxge_lro_cnt = 8;
99 static int mxge_ticks;
100 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
101 static char *mxge_fw_aligned = "mxge_eth_z8e";
102 
103 static int mxge_probe(device_t dev);
104 static int mxge_attach(device_t dev);
105 static int mxge_detach(device_t dev);
106 static int mxge_shutdown(device_t dev);
107 static void mxge_intr(void *arg);
108 
109 static device_method_t mxge_methods[] =
110 {
111   /* Device interface */
112   DEVMETHOD(device_probe, mxge_probe),
113   DEVMETHOD(device_attach, mxge_attach),
114   DEVMETHOD(device_detach, mxge_detach),
115   DEVMETHOD(device_shutdown, mxge_shutdown),
116   {0, 0}
117 };
118 
119 static driver_t mxge_driver =
120 {
121   "mxge",
122   mxge_methods,
123   sizeof(mxge_softc_t),
124 };
125 
126 static devclass_t mxge_devclass;
127 
128 /* Declare ourselves to be a child of the PCI bus.*/
129 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
130 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
131 
132 static int mxge_load_firmware(mxge_softc_t *sc);
133 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
134 static int mxge_close(mxge_softc_t *sc);
135 static int mxge_open(mxge_softc_t *sc);
136 static void mxge_tick(void *arg);
137 
138 static int
139 mxge_probe(device_t dev)
140 {
141   if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
142       (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) {
143 	  device_set_desc(dev, "Myri10G-PCIE-8A");
144 	  return 0;
145   }
146   return ENXIO;
147 }
148 
149 static void
150 mxge_enable_wc(mxge_softc_t *sc)
151 {
152 	struct mem_range_desc mrdesc;
153 	vm_paddr_t pa;
154 	vm_offset_t len;
155 	int err, action;
156 
157 	len = rman_get_size(sc->mem_res);
158 #if defined(__i386) || defined(__amd64)
159 	err = pmap_change_attr((vm_offset_t) sc->sram,
160 			       len, PAT_WRITE_COMBINING);
161 	if (err == 0)
162 		return;
163 	else
164 		device_printf(sc->dev, "pmap_change_attr failed, %d\n",
165 			      err);
166 #endif
167 	pa = rman_get_start(sc->mem_res);
168 	mrdesc.mr_base = pa;
169 	mrdesc.mr_len = len;
170 	mrdesc.mr_flags = MDF_WRITECOMBINE;
171 	action = MEMRANGE_SET_UPDATE;
172 	strcpy((char *)&mrdesc.mr_owner, "mxge");
173 	err = mem_range_attr_set(&mrdesc, &action);
174 	if (err != 0) {
175 		device_printf(sc->dev,
176 			      "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n",
177 			      (unsigned long)pa, (unsigned long)len, err);
178 	} else {
179 		sc->wc = 1;
180 	}
181 }
182 
183 
184 /* callback to get our DMA address */
185 static void
186 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
187 			 int error)
188 {
189 	if (error == 0) {
190 		*(bus_addr_t *) arg = segs->ds_addr;
191 	}
192 }
193 
194 static int
195 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
196 		   bus_size_t alignment)
197 {
198 	int err;
199 	device_t dev = sc->dev;
200 
201 	/* allocate DMAable memory tags */
202 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
203 				 alignment,		/* alignment */
204 				 4096,			/* boundary */
205 				 BUS_SPACE_MAXADDR,	/* low */
206 				 BUS_SPACE_MAXADDR,	/* high */
207 				 NULL, NULL,		/* filter */
208 				 bytes,			/* maxsize */
209 				 1,			/* num segs */
210 				 4096,			/* maxsegsize */
211 				 BUS_DMA_COHERENT,	/* flags */
212 				 NULL, NULL,		/* lock */
213 				 &dma->dmat);		/* tag */
214 	if (err != 0) {
215 		device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
216 		return err;
217 	}
218 
219 	/* allocate DMAable memory & map */
220 	err = bus_dmamem_alloc(dma->dmat, &dma->addr,
221 			       (BUS_DMA_WAITOK | BUS_DMA_COHERENT
222 				| BUS_DMA_ZERO),  &dma->map);
223 	if (err != 0) {
224 		device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
225 		goto abort_with_dmat;
226 	}
227 
228 	/* load the memory */
229 	err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
230 			      mxge_dmamap_callback,
231 			      (void *)&dma->bus_addr, 0);
232 	if (err != 0) {
233 		device_printf(dev, "couldn't load map (err = %d)\n", err);
234 		goto abort_with_mem;
235 	}
236 	return 0;
237 
238 abort_with_mem:
239 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
240 abort_with_dmat:
241 	(void)bus_dma_tag_destroy(dma->dmat);
242 	return err;
243 }
244 
245 
246 static void
247 mxge_dma_free(mxge_dma_t *dma)
248 {
249 	bus_dmamap_unload(dma->dmat, dma->map);
250 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
251 	(void)bus_dma_tag_destroy(dma->dmat);
252 }
253 
254 /*
255  * The eeprom strings on the lanaiX have the format
256  * SN=x\0
257  * MAC=x:x:x:x:x:x\0
258  * PC=text\0
259  */
260 
261 static int
262 mxge_parse_strings(mxge_softc_t *sc)
263 {
264 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
265 
266 	char *ptr, *limit;
267 	int i, found_mac;
268 
269 	ptr = sc->eeprom_strings;
270 	limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
271 	found_mac = 0;
272 	while (ptr < limit && *ptr != '\0') {
273 		if (memcmp(ptr, "MAC=", 4) == 0) {
274 			ptr += 1;
275 			sc->mac_addr_string = ptr;
276 			for (i = 0; i < 6; i++) {
277 				ptr += 3;
278 				if ((ptr + 2) > limit)
279 					goto abort;
280 				sc->mac_addr[i] = strtoul(ptr, NULL, 16);
281 				found_mac = 1;
282 			}
283 		} else if (memcmp(ptr, "PC=", 3) == 0) {
284 			ptr += 3;
285 			strncpy(sc->product_code_string, ptr,
286 				sizeof (sc->product_code_string) - 1);
287 		} else if (memcmp(ptr, "SN=", 3) == 0) {
288 			ptr += 3;
289 			strncpy(sc->serial_number_string, ptr,
290 				sizeof (sc->serial_number_string) - 1);
291 		}
292 		MXGE_NEXT_STRING(ptr);
293 	}
294 
295 	if (found_mac)
296 		return 0;
297 
298  abort:
299 	device_printf(sc->dev, "failed to parse eeprom_strings\n");
300 
301 	return ENXIO;
302 }
303 
304 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
305 static void
306 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
307 {
308 	uint32_t val;
309 	unsigned long base, off;
310 	char *va, *cfgptr;
311 	device_t pdev, mcp55;
312 	uint16_t vendor_id, device_id, word;
313 	uintptr_t bus, slot, func, ivend, idev;
314 	uint32_t *ptr32;
315 
316 
317 	if (!mxge_nvidia_ecrc_enable)
318 		return;
319 
320 	pdev = device_get_parent(device_get_parent(sc->dev));
321 	if (pdev == NULL) {
322 		device_printf(sc->dev, "could not find parent?\n");
323 		return;
324 	}
325 	vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
326 	device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
327 
328 	if (vendor_id != 0x10de)
329 		return;
330 
331 	base = 0;
332 
333 	if (device_id == 0x005d) {
334 		/* ck804, base address is magic */
335 		base = 0xe0000000UL;
336 	} else if (device_id >= 0x0374 && device_id <= 0x378) {
337 		/* mcp55, base address stored in chipset */
338 		mcp55 = pci_find_bsf(0, 0, 0);
339 		if (mcp55 &&
340 		    0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
341 		    0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
342 			word = pci_read_config(mcp55, 0x90, 2);
343 			base = ((unsigned long)word & 0x7ffeU) << 25;
344 		}
345 	}
346 	if (!base)
347 		return;
348 
349 	/* XXXX
350 	   Test below is commented because it is believed that doing
351 	   config read/write beyond 0xff will access the config space
352 	   for the next larger function.  Uncomment this and remove
353 	   the hacky pmap_mapdev() way of accessing config space when
354 	   FreeBSD grows support for extended pcie config space access
355 	*/
356 #if 0
357 	/* See if we can, by some miracle, access the extended
358 	   config space */
359 	val = pci_read_config(pdev, 0x178, 4);
360 	if (val != 0xffffffff) {
361 		val |= 0x40;
362 		pci_write_config(pdev, 0x178, val, 4);
363 		return;
364 	}
365 #endif
366 	/* Rather than using normal pci config space writes, we must
367 	 * map the Nvidia config space ourselves.  This is because on
368 	 * opteron/nvidia class machine the 0xe000000 mapping is
369 	 * handled by the nvidia chipset, that means the internal PCI
370 	 * device (the on-chip northbridge), or the amd-8131 bridge
371 	 * and things behind them are not visible by this method.
372 	 */
373 
374 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
375 		      PCI_IVAR_BUS, &bus);
376 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
377 		      PCI_IVAR_SLOT, &slot);
378 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
379 		      PCI_IVAR_FUNCTION, &func);
380 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
381 		      PCI_IVAR_VENDOR, &ivend);
382 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
383 		      PCI_IVAR_DEVICE, &idev);
384 
385 	off =  base
386 		+ 0x00100000UL * (unsigned long)bus
387 		+ 0x00001000UL * (unsigned long)(func
388 						 + 8 * slot);
389 
390 	/* map it into the kernel */
391 	va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
392 
393 
394 	if (va == NULL) {
395 		device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
396 		return;
397 	}
398 	/* get a pointer to the config space mapped into the kernel */
399 	cfgptr = va + (off & PAGE_MASK);
400 
401 	/* make sure that we can really access it */
402 	vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
403 	device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
404 	if (! (vendor_id == ivend && device_id == idev)) {
405 		device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
406 			      vendor_id, device_id);
407 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
408 		return;
409 	}
410 
411 	ptr32 = (uint32_t*)(cfgptr + 0x178);
412 	val = *ptr32;
413 
414 	if (val == 0xffffffff) {
415 		device_printf(sc->dev, "extended mapping failed\n");
416 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
417 		return;
418 	}
419 	*ptr32 = val | 0x40;
420 	pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
421 	if (mxge_verbose)
422 		device_printf(sc->dev,
423 			      "Enabled ECRC on upstream Nvidia bridge "
424 			      "at %d:%d:%d\n",
425 			      (int)bus, (int)slot, (int)func);
426 	return;
427 }
428 #else
429 static void
430 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev)
431 {
432 	device_printf(sc->dev,
433 		      "Nforce 4 chipset on non-x86/amd64!?!?!\n");
434 	return;
435 }
436 #endif
437 
438 
439 static int
440 mxge_dma_test(mxge_softc_t *sc, int test_type)
441 {
442 	mxge_cmd_t cmd;
443 	bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
444 	int status;
445 	uint32_t len;
446 	char *test = " ";
447 
448 
449 	/* Run a small DMA test.
450 	 * The magic multipliers to the length tell the firmware
451 	 * to do DMA read, write, or read+write tests.  The
452 	 * results are returned in cmd.data0.  The upper 16
453 	 * bits of the return is the number of transfers completed.
454 	 * The lower 16 bits is the time in 0.5us ticks that the
455 	 * transfers took to complete.
456 	 */
457 
458 	len = sc->tx.boundary;
459 
460 	cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
461 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
462 	cmd.data2 = len * 0x10000;
463 	status = mxge_send_cmd(sc, test_type, &cmd);
464 	if (status != 0) {
465 		test = "read";
466 		goto abort;
467 	}
468 	sc->read_dma = ((cmd.data0>>16) * len * 2) /
469 		(cmd.data0 & 0xffff);
470 	cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
471 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
472 	cmd.data2 = len * 0x1;
473 	status = mxge_send_cmd(sc, test_type, &cmd);
474 	if (status != 0) {
475 		test = "write";
476 		goto abort;
477 	}
478 	sc->write_dma = ((cmd.data0>>16) * len * 2) /
479 		(cmd.data0 & 0xffff);
480 
481 	cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
482 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
483 	cmd.data2 = len * 0x10001;
484 	status = mxge_send_cmd(sc, test_type, &cmd);
485 	if (status != 0) {
486 		test = "read/write";
487 		goto abort;
488 	}
489 	sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
490 		(cmd.data0 & 0xffff);
491 
492 abort:
493 	if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
494 		device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
495 			      test, status);
496 
497 	return status;
498 }
499 
500 /*
501  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
502  * when the PCI-E Completion packets are aligned on an 8-byte
503  * boundary.  Some PCI-E chip sets always align Completion packets; on
504  * the ones that do not, the alignment can be enforced by enabling
505  * ECRC generation (if supported).
506  *
507  * When PCI-E Completion packets are not aligned, it is actually more
508  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
509  *
510  * If the driver can neither enable ECRC nor verify that it has
511  * already been enabled, then it must use a firmware image which works
512  * around unaligned completion packets (ethp_z8e.dat), and it should
513  * also ensure that it never gives the device a Read-DMA which is
514  * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
515  * enabled, then the driver should use the aligned (eth_z8e.dat)
516  * firmware image, and set tx.boundary to 4KB.
517  */
518 
519 static int
520 mxge_firmware_probe(mxge_softc_t *sc)
521 {
522 	device_t dev = sc->dev;
523 	int reg, status;
524 	uint16_t pectl;
525 
526 	sc->tx.boundary = 4096;
527 	/*
528 	 * Verify the max read request size was set to 4KB
529 	 * before trying the test with 4KB.
530 	 */
531 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
532 		pectl = pci_read_config(dev, reg + 0x8, 2);
533 		if ((pectl & (5 << 12)) != (5 << 12)) {
534 			device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
535 				      pectl);
536 			sc->tx.boundary = 2048;
537 		}
538 	}
539 
540 	/*
541 	 * load the optimized firmware (which assumes aligned PCIe
542 	 * completions) in order to see if it works on this host.
543 	 */
544 	sc->fw_name = mxge_fw_aligned;
545 	status = mxge_load_firmware(sc);
546 	if (status != 0) {
547 		return status;
548 	}
549 
550 	/*
551 	 * Enable ECRC if possible
552 	 */
553 	mxge_enable_nvidia_ecrc(sc);
554 
555 	/*
556 	 * Run a DMA test which watches for unaligned completions and
557 	 * aborts on the first one seen.
558 	 */
559 
560 	status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
561 	if (status == 0)
562 		return 0; /* keep the aligned firmware */
563 
564 	if (status != E2BIG)
565 		device_printf(dev, "DMA test failed: %d\n", status);
566 	if (status == ENOSYS)
567 		device_printf(dev, "Falling back to ethp! "
568 			      "Please install up to date fw\n");
569 	return status;
570 }
571 
572 static int
573 mxge_select_firmware(mxge_softc_t *sc)
574 {
575 	int aligned = 0;
576 
577 
578 	if (mxge_force_firmware != 0) {
579 		if (mxge_force_firmware == 1)
580 			aligned = 1;
581 		else
582 			aligned = 0;
583 		if (mxge_verbose)
584 			device_printf(sc->dev,
585 				      "Assuming %s completions (forced)\n",
586 				      aligned ? "aligned" : "unaligned");
587 		goto abort;
588 	}
589 
590 	/* if the PCIe link width is 4 or less, we can use the aligned
591 	   firmware and skip any checks */
592 	if (sc->link_width != 0 && sc->link_width <= 4) {
593 		device_printf(sc->dev,
594 			      "PCIe x%d Link, expect reduced performance\n",
595 			      sc->link_width);
596 		aligned = 1;
597 		goto abort;
598 	}
599 
600 	if (0 == mxge_firmware_probe(sc))
601 		return 0;
602 
603 abort:
604 	if (aligned) {
605 		sc->fw_name = mxge_fw_aligned;
606 		sc->tx.boundary = 4096;
607 	} else {
608 		sc->fw_name = mxge_fw_unaligned;
609 		sc->tx.boundary = 2048;
610 	}
611 	return (mxge_load_firmware(sc));
612 }
613 
614 union qualhack
615 {
616         const char *ro_char;
617         char *rw_char;
618 };
619 
620 static int
621 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
622 {
623 
624 
625 	if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
626 		device_printf(sc->dev, "Bad firmware type: 0x%x\n",
627 			      be32toh(hdr->mcp_type));
628 		return EIO;
629 	}
630 
631 	/* save firmware version for sysctl */
632 	strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
633 	if (mxge_verbose)
634 		device_printf(sc->dev, "firmware id: %s\n", hdr->version);
635 
636 	sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
637 	       &sc->fw_ver_minor, &sc->fw_ver_tiny);
638 
639 	if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
640 	      && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
641 		device_printf(sc->dev, "Found firmware version %s\n",
642 			      sc->fw_version);
643 		device_printf(sc->dev, "Driver needs %d.%d\n",
644 			      MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
645 		return EINVAL;
646 	}
647 	return 0;
648 
649 }
650 
651 static int
652 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
653 {
654 	const struct firmware *fw;
655 	const mcp_gen_header_t *hdr;
656 	unsigned hdr_offset;
657 	const char *fw_data;
658 	union qualhack hack;
659 	int status;
660 	unsigned int i;
661 	char dummy;
662 
663 
664 	fw = firmware_get(sc->fw_name);
665 
666 	if (fw == NULL) {
667 		device_printf(sc->dev, "Could not find firmware image %s\n",
668 			      sc->fw_name);
669 		return ENOENT;
670 	}
671 	if (fw->datasize > *limit ||
672 	    fw->datasize < MCP_HEADER_PTR_OFFSET + 4) {
673 		device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n",
674 			      sc->fw_name, (int)fw->datasize, (int) *limit);
675 		status = ENOSPC;
676 		goto abort_with_fw;
677 	}
678 	*limit = fw->datasize;
679 
680 	/* check id */
681 	fw_data = (const char *)fw->data;
682 	hdr_offset = htobe32(*(const uint32_t *)
683 			     (fw_data + MCP_HEADER_PTR_OFFSET));
684 	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) {
685 		device_printf(sc->dev, "Bad firmware file");
686 		status = EIO;
687 		goto abort_with_fw;
688 	}
689 	hdr = (const void*)(fw_data + hdr_offset);
690 
691 	status = mxge_validate_firmware(sc, hdr);
692 	if (status != 0)
693 		goto abort_with_fw;
694 
695 	hack.ro_char = fw_data;
696 	/* Copy the inflated firmware to NIC SRAM. */
697 	for (i = 0; i < *limit; i += 256) {
698 		mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
699 			      hack.rw_char + i,
700 			      min(256U, (unsigned)(*limit - i)));
701 		mb();
702 		dummy = *sc->sram;
703 		mb();
704 	}
705 
706 	status = 0;
707 abort_with_fw:
708 	firmware_put(fw, FIRMWARE_UNLOAD);
709 	return status;
710 }
711 
712 /*
713  * Enable or disable periodic RDMAs from the host to make certain
714  * chipsets resend dropped PCIe messages
715  */
716 
717 static void
718 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
719 {
720 	char buf_bytes[72];
721 	volatile uint32_t *confirm;
722 	volatile char *submit;
723 	uint32_t *buf, dma_low, dma_high;
724 	int i;
725 
726 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
727 
728 	/* clear confirmation addr */
729 	confirm = (volatile uint32_t *)sc->cmd;
730 	*confirm = 0;
731 	mb();
732 
733 	/* send an rdma command to the PCIe engine, and wait for the
734 	   response in the confirmation address.  The firmware should
735 	   write a -1 there to indicate it is alive and well
736 	*/
737 
738 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
739 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
740 	buf[0] = htobe32(dma_high);		/* confirm addr MSW */
741 	buf[1] = htobe32(dma_low);		/* confirm addr LSW */
742 	buf[2] = htobe32(0xffffffff);		/* confirm data */
743 	dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
744 	dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
745 	buf[3] = htobe32(dma_high); 		/* dummy addr MSW */
746 	buf[4] = htobe32(dma_low); 		/* dummy addr LSW */
747 	buf[5] = htobe32(enable);			/* enable? */
748 
749 
750 	submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
751 
752 	mxge_pio_copy(submit, buf, 64);
753 	mb();
754 	DELAY(1000);
755 	mb();
756 	i = 0;
757 	while (*confirm != 0xffffffff && i < 20) {
758 		DELAY(1000);
759 		i++;
760 	}
761 	if (*confirm != 0xffffffff) {
762 		device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
763 			      (enable ? "enable" : "disable"), confirm,
764 			      *confirm);
765 	}
766 	return;
767 }
768 
769 static int
770 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
771 {
772 	mcp_cmd_t *buf;
773 	char buf_bytes[sizeof(*buf) + 8];
774 	volatile mcp_cmd_response_t *response = sc->cmd;
775 	volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
776 	uint32_t dma_low, dma_high;
777 	int err, sleep_total = 0;
778 
779 	/* ensure buf is aligned to 8 bytes */
780 	buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
781 
782 	buf->data0 = htobe32(data->data0);
783 	buf->data1 = htobe32(data->data1);
784 	buf->data2 = htobe32(data->data2);
785 	buf->cmd = htobe32(cmd);
786 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
787 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
788 
789 	buf->response_addr.low = htobe32(dma_low);
790 	buf->response_addr.high = htobe32(dma_high);
791 	mtx_lock(&sc->cmd_mtx);
792 	response->result = 0xffffffff;
793 	mb();
794 	mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
795 
796 	/* wait up to 20ms */
797 	err = EAGAIN;
798 	for (sleep_total = 0; sleep_total <  20; sleep_total++) {
799 		bus_dmamap_sync(sc->cmd_dma.dmat,
800 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
801 		mb();
802 		switch (be32toh(response->result)) {
803 		case 0:
804 			data->data0 = be32toh(response->data);
805 			err = 0;
806 			break;
807 		case 0xffffffff:
808 			DELAY(1000);
809 			break;
810 		case MXGEFW_CMD_UNKNOWN:
811 			err = ENOSYS;
812 			break;
813 		case MXGEFW_CMD_ERROR_UNALIGNED:
814 			err = E2BIG;
815 			break;
816 		default:
817 			device_printf(sc->dev,
818 				      "mxge: command %d "
819 				      "failed, result = %d\n",
820 				      cmd, be32toh(response->result));
821 			err = ENXIO;
822 			break;
823 		}
824 		if (err != EAGAIN)
825 			break;
826 	}
827 	if (err == EAGAIN)
828 		device_printf(sc->dev, "mxge: command %d timed out"
829 			      "result = %d\n",
830 			      cmd, be32toh(response->result));
831 	mtx_unlock(&sc->cmd_mtx);
832 	return err;
833 }
834 
835 static int
836 mxge_adopt_running_firmware(mxge_softc_t *sc)
837 {
838 	struct mcp_gen_header *hdr;
839 	const size_t bytes = sizeof (struct mcp_gen_header);
840 	size_t hdr_offset;
841 	int status;
842 
843 	/* find running firmware header */
844 	hdr_offset = htobe32(*(volatile uint32_t *)
845 			     (sc->sram + MCP_HEADER_PTR_OFFSET));
846 
847 	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
848 		device_printf(sc->dev,
849 			      "Running firmware has bad header offset (%d)\n",
850 			      (int)hdr_offset);
851 		return EIO;
852 	}
853 
854 	/* copy header of running firmware from SRAM to host memory to
855 	 * validate firmware */
856 	hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
857 	if (hdr == NULL) {
858 		device_printf(sc->dev, "could not malloc firmware hdr\n");
859 		return ENOMEM;
860 	}
861 	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
862 				rman_get_bushandle(sc->mem_res),
863 				hdr_offset, (char *)hdr, bytes);
864 	status = mxge_validate_firmware(sc, hdr);
865 	free(hdr, M_DEVBUF);
866 
867 	/*
868 	 * check to see if adopted firmware has bug where adopting
869 	 * it will cause broadcasts to be filtered unless the NIC
870 	 * is kept in ALLMULTI mode
871 	 */
872 	if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
873 	    sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
874 		sc->adopted_rx_filter_bug = 1;
875 		device_printf(sc->dev, "Adopting fw %d.%d.%d: "
876 			      "working around rx filter bug\n",
877 			      sc->fw_ver_major, sc->fw_ver_minor,
878 			      sc->fw_ver_tiny);
879 	}
880 
881 	return status;
882 }
883 
884 
885 static int
886 mxge_load_firmware(mxge_softc_t *sc)
887 {
888 	volatile uint32_t *confirm;
889 	volatile char *submit;
890 	char buf_bytes[72];
891 	uint32_t *buf, size, dma_low, dma_high;
892 	int status, i;
893 
894 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
895 
896 	size = sc->sram_size;
897 	status = mxge_load_firmware_helper(sc, &size);
898 	if (status) {
899 		/* Try to use the currently running firmware, if
900 		   it is new enough */
901 		status = mxge_adopt_running_firmware(sc);
902 		if (status) {
903 			device_printf(sc->dev,
904 				      "failed to adopt running firmware\n");
905 			return status;
906 		}
907 		device_printf(sc->dev,
908 			      "Successfully adopted running firmware\n");
909 		if (sc->tx.boundary == 4096) {
910 			device_printf(sc->dev,
911 				"Using firmware currently running on NIC"
912 				 ".  For optimal\n");
913 			device_printf(sc->dev,
914 				 "performance consider loading optimized "
915 				 "firmware\n");
916 		}
917 		sc->fw_name = mxge_fw_unaligned;
918 		sc->tx.boundary = 2048;
919 		return 0;
920 	}
921 	/* clear confirmation addr */
922 	confirm = (volatile uint32_t *)sc->cmd;
923 	*confirm = 0;
924 	mb();
925 	/* send a reload command to the bootstrap MCP, and wait for the
926 	   response in the confirmation address.  The firmware should
927 	   write a -1 there to indicate it is alive and well
928 	*/
929 
930 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
931 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
932 
933 	buf[0] = htobe32(dma_high);	/* confirm addr MSW */
934 	buf[1] = htobe32(dma_low);	/* confirm addr LSW */
935 	buf[2] = htobe32(0xffffffff);	/* confirm data */
936 
937 	/* FIX: All newest firmware should un-protect the bottom of
938 	   the sram before handoff. However, the very first interfaces
939 	   do not. Therefore the handoff copy must skip the first 8 bytes
940 	*/
941 					/* where the code starts*/
942 	buf[3] = htobe32(MXGE_FW_OFFSET + 8);
943 	buf[4] = htobe32(size - 8); 	/* length of code */
944 	buf[5] = htobe32(8);		/* where to copy to */
945 	buf[6] = htobe32(0);		/* where to jump to */
946 
947 	submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
948 	mxge_pio_copy(submit, buf, 64);
949 	mb();
950 	DELAY(1000);
951 	mb();
952 	i = 0;
953 	while (*confirm != 0xffffffff && i < 20) {
954 		DELAY(1000*10);
955 		i++;
956 		bus_dmamap_sync(sc->cmd_dma.dmat,
957 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
958 	}
959 	if (*confirm != 0xffffffff) {
960 		device_printf(sc->dev,"handoff failed (%p = 0x%x)",
961 			confirm, *confirm);
962 
963 		return ENXIO;
964 	}
965 	return 0;
966 }
967 
968 static int
969 mxge_update_mac_address(mxge_softc_t *sc)
970 {
971 	mxge_cmd_t cmd;
972 	uint8_t *addr = sc->mac_addr;
973 	int status;
974 
975 
976 	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
977 		     | (addr[2] << 8) | addr[3]);
978 
979 	cmd.data1 = ((addr[4] << 8) | (addr[5]));
980 
981 	status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
982 	return status;
983 }
984 
985 static int
986 mxge_change_pause(mxge_softc_t *sc, int pause)
987 {
988 	mxge_cmd_t cmd;
989 	int status;
990 
991 	if (pause)
992 		status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
993 				       &cmd);
994 	else
995 		status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
996 				       &cmd);
997 
998 	if (status) {
999 		device_printf(sc->dev, "Failed to set flow control mode\n");
1000 		return ENXIO;
1001 	}
1002 	sc->pause = pause;
1003 	return 0;
1004 }
1005 
1006 static void
1007 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1008 {
1009 	mxge_cmd_t cmd;
1010 	int status;
1011 
1012 	if (promisc)
1013 		status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1014 				       &cmd);
1015 	else
1016 		status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1017 				       &cmd);
1018 
1019 	if (status) {
1020 		device_printf(sc->dev, "Failed to set promisc mode\n");
1021 	}
1022 }
1023 
1024 static void
1025 mxge_set_multicast_list(mxge_softc_t *sc)
1026 {
1027 	mxge_cmd_t cmd;
1028 	struct ifmultiaddr *ifma;
1029 	struct ifnet *ifp = sc->ifp;
1030 	int err;
1031 
1032 	/* This firmware is known to not support multicast */
1033 	if (!sc->fw_multicast_support)
1034 		return;
1035 
1036 	/* Disable multicast filtering while we play with the lists*/
1037 	err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1038 	if (err != 0) {
1039 		device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1040 		       " error status: %d\n", err);
1041 		return;
1042 	}
1043 
1044 	if (sc->adopted_rx_filter_bug)
1045 		return;
1046 
1047 	if (ifp->if_flags & IFF_ALLMULTI)
1048 		/* request to disable multicast filtering, so quit here */
1049 		return;
1050 
1051 	/* Flush all the filters */
1052 
1053 	err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1054 	if (err != 0) {
1055 		device_printf(sc->dev,
1056 			      "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1057 			      ", error status: %d\n", err);
1058 		return;
1059 	}
1060 
1061 	/* Walk the multicast list, and add each address */
1062 
1063 	IF_ADDR_LOCK(ifp);
1064 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1065 		if (ifma->ifma_addr->sa_family != AF_LINK)
1066 			continue;
1067 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1068 		      &cmd.data0, 4);
1069 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1070 		      &cmd.data1, 2);
1071 		cmd.data0 = htonl(cmd.data0);
1072 		cmd.data1 = htonl(cmd.data1);
1073 		err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1074 		if (err != 0) {
1075 			device_printf(sc->dev, "Failed "
1076 			       "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1077 			       "%d\t", err);
1078 			/* abort, leaving multicast filtering off */
1079 			IF_ADDR_UNLOCK(ifp);
1080 			return;
1081 		}
1082 	}
1083 	IF_ADDR_UNLOCK(ifp);
1084 	/* Enable multicast filtering */
1085 	err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1086 	if (err != 0) {
1087 		device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1088 		       ", error status: %d\n", err);
1089 	}
1090 }
1091 
1092 static int
1093 mxge_max_mtu(mxge_softc_t *sc)
1094 {
1095 	mxge_cmd_t cmd;
1096 	int status;
1097 
1098 	if (MJUMPAGESIZE - MXGEFW_PAD >  MXGEFW_MAX_MTU)
1099 		return  MXGEFW_MAX_MTU - MXGEFW_PAD;
1100 
1101 	/* try to set nbufs to see if it we can
1102 	   use virtually contiguous jumbos */
1103 	cmd.data0 = 0;
1104 	status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1105 			       &cmd);
1106 	if (status == 0)
1107 		return  MXGEFW_MAX_MTU - MXGEFW_PAD;
1108 
1109 	/* otherwise, we're limited to MJUMPAGESIZE */
1110 	return MJUMPAGESIZE - MXGEFW_PAD;
1111 }
1112 
1113 static int
1114 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1115 {
1116 
1117 	mxge_cmd_t cmd;
1118 	size_t bytes;
1119 	int status;
1120 
1121 	/* try to send a reset command to the card to see if it
1122 	   is alive */
1123 	memset(&cmd, 0, sizeof (cmd));
1124 	status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1125 	if (status != 0) {
1126 		device_printf(sc->dev, "failed reset\n");
1127 		return ENXIO;
1128 	}
1129 
1130 	mxge_dummy_rdma(sc, 1);
1131 
1132 	if (interrupts_setup) {
1133 		/* Now exchange information about interrupts  */
1134 		bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry);
1135 		memset(sc->rx_done.entry, 0, bytes);
1136 		cmd.data0 = (uint32_t)bytes;
1137 		status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1138 		cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr);
1139 		cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr);
1140 		status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd);
1141 	}
1142 
1143 	status |= mxge_send_cmd(sc,
1144 				MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1145 
1146 
1147 	sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1148 
1149 	status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1150 	sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1151 
1152 
1153 	status |= mxge_send_cmd(sc,  MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1154 				&cmd);
1155 	sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1156 	if (status != 0) {
1157 		device_printf(sc->dev, "failed set interrupt parameters\n");
1158 		return status;
1159 	}
1160 
1161 
1162 	*sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1163 
1164 
1165 	/* run a DMA benchmark */
1166 	(void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1167 
1168 	/* reset mcp/driver shared state back to 0 */
1169 	sc->rx_done.idx = 0;
1170 	sc->rx_done.cnt = 0;
1171 	sc->tx.req = 0;
1172 	sc->tx.done = 0;
1173 	sc->tx.pkt_done = 0;
1174 	sc->tx.wake = 0;
1175 	sc->tx_defrag = 0;
1176 	sc->tx.stall = 0;
1177 	sc->rx_big.cnt = 0;
1178 	sc->rx_small.cnt = 0;
1179 	sc->rdma_tags_available = 15;
1180 	sc->fw_stats->valid = 0;
1181 	sc->fw_stats->send_done_count = 0;
1182 	sc->lro_bad_csum = 0;
1183 	sc->lro_queued = 0;
1184 	sc->lro_flushed = 0;
1185 	status = mxge_update_mac_address(sc);
1186 	mxge_change_promisc(sc, 0);
1187 	mxge_change_pause(sc, sc->pause);
1188 	mxge_set_multicast_list(sc);
1189 	return status;
1190 }
1191 
1192 static int
1193 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1194 {
1195         mxge_softc_t *sc;
1196         unsigned int intr_coal_delay;
1197         int err;
1198 
1199         sc = arg1;
1200         intr_coal_delay = sc->intr_coal_delay;
1201         err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1202         if (err != 0) {
1203                 return err;
1204         }
1205         if (intr_coal_delay == sc->intr_coal_delay)
1206                 return 0;
1207 
1208         if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1209                 return EINVAL;
1210 
1211 	mtx_lock(&sc->driver_mtx);
1212 	*sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1213 	sc->intr_coal_delay = intr_coal_delay;
1214 
1215 	mtx_unlock(&sc->driver_mtx);
1216         return err;
1217 }
1218 
1219 static int
1220 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1221 {
1222         mxge_softc_t *sc;
1223         unsigned int enabled;
1224         int err;
1225 
1226         sc = arg1;
1227         enabled = sc->pause;
1228         err = sysctl_handle_int(oidp, &enabled, arg2, req);
1229         if (err != 0) {
1230                 return err;
1231         }
1232         if (enabled == sc->pause)
1233                 return 0;
1234 
1235 	mtx_lock(&sc->driver_mtx);
1236 	err = mxge_change_pause(sc, enabled);
1237 	mtx_unlock(&sc->driver_mtx);
1238         return err;
1239 }
1240 
1241 static int
1242 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1243 {
1244 	struct ifnet *ifp;
1245 	int err;
1246 
1247 	ifp = sc->ifp;
1248 	if (lro_cnt == 0)
1249 		ifp->if_capenable &= ~IFCAP_LRO;
1250 	else
1251 		ifp->if_capenable |= IFCAP_LRO;
1252 	sc->lro_cnt = lro_cnt;
1253 	callout_stop(&sc->co_hdl);
1254 	mxge_close(sc);
1255 	err = mxge_open(sc);
1256 	if (err == 0)
1257 		callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
1258 	return err;
1259 }
1260 
1261 static int
1262 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1263 {
1264 	mxge_softc_t *sc;
1265 	unsigned int lro_cnt;
1266 	int err;
1267 
1268 	sc = arg1;
1269 	lro_cnt = sc->lro_cnt;
1270 	err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1271 	if (err != 0)
1272 		return err;
1273 
1274 	if (lro_cnt == sc->lro_cnt)
1275 		return 0;
1276 
1277 	if (lro_cnt > 128)
1278 		return EINVAL;
1279 
1280 	mtx_lock(&sc->driver_mtx);
1281 	err = mxge_change_lro_locked(sc, lro_cnt);
1282 	mtx_unlock(&sc->driver_mtx);
1283 	return err;
1284 }
1285 
1286 static int
1287 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1288 {
1289         int err;
1290 
1291         if (arg1 == NULL)
1292                 return EFAULT;
1293         arg2 = be32toh(*(int *)arg1);
1294         arg1 = NULL;
1295         err = sysctl_handle_int(oidp, arg1, arg2, req);
1296 
1297         return err;
1298 }
1299 
1300 static void
1301 mxge_add_sysctls(mxge_softc_t *sc)
1302 {
1303 	struct sysctl_ctx_list *ctx;
1304 	struct sysctl_oid_list *children;
1305 	mcp_irq_data_t *fw;
1306 
1307 	ctx = device_get_sysctl_ctx(sc->dev);
1308 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1309 	fw = sc->fw_stats;
1310 
1311 	/* random information */
1312 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1313 		       "firmware_version",
1314 		       CTLFLAG_RD, &sc->fw_version,
1315 		       0, "firmware version");
1316 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1317 		       "serial_number",
1318 		       CTLFLAG_RD, &sc->serial_number_string,
1319 		       0, "serial number");
1320 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1321 		       "product_code",
1322 		       CTLFLAG_RD, &sc->product_code_string,
1323 		       0, "product_code");
1324 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1325 		       "pcie_link_width",
1326 		       CTLFLAG_RD, &sc->link_width,
1327 		       0, "tx_boundary");
1328 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1329 		       "tx_boundary",
1330 		       CTLFLAG_RD, &sc->tx.boundary,
1331 		       0, "tx_boundary");
1332 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1333 		       "write_combine",
1334 		       CTLFLAG_RD, &sc->wc,
1335 		       0, "write combining PIO?");
1336 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1337 		       "read_dma_MBs",
1338 		       CTLFLAG_RD, &sc->read_dma,
1339 		       0, "DMA Read speed in MB/s");
1340 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1341 		       "write_dma_MBs",
1342 		       CTLFLAG_RD, &sc->write_dma,
1343 		       0, "DMA Write speed in MB/s");
1344 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1345 		       "read_write_dma_MBs",
1346 		       CTLFLAG_RD, &sc->read_write_dma,
1347 		       0, "DMA concurrent Read/Write speed in MB/s");
1348 
1349 
1350 	/* performance related tunables */
1351 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1352 			"intr_coal_delay",
1353 			CTLTYPE_INT|CTLFLAG_RW, sc,
1354 			0, mxge_change_intr_coal,
1355 			"I", "interrupt coalescing delay in usecs");
1356 
1357 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1358 			"flow_control_enabled",
1359 			CTLTYPE_INT|CTLFLAG_RW, sc,
1360 			0, mxge_change_flow_control,
1361 			"I", "interrupt coalescing delay in usecs");
1362 
1363 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1364 		       "deassert_wait",
1365 		       CTLFLAG_RW, &mxge_deassert_wait,
1366 		       0, "Wait for IRQ line to go low in ihandler");
1367 
1368 	/* stats block from firmware is in network byte order.
1369 	   Need to swap it */
1370 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1371 			"link_up",
1372 			CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1373 			0, mxge_handle_be32,
1374 			"I", "link up");
1375 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1376 			"rdma_tags_available",
1377 			CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1378 			0, mxge_handle_be32,
1379 			"I", "rdma_tags_available");
1380 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1381 			"dropped_bad_crc32",
1382 			CTLTYPE_INT|CTLFLAG_RD,
1383 			&fw->dropped_bad_crc32,
1384 			0, mxge_handle_be32,
1385 			"I", "dropped_bad_crc32");
1386 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1387 			"dropped_bad_phy",
1388 			CTLTYPE_INT|CTLFLAG_RD,
1389 			&fw->dropped_bad_phy,
1390 			0, mxge_handle_be32,
1391 			"I", "dropped_bad_phy");
1392 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1393 			"dropped_link_error_or_filtered",
1394 			CTLTYPE_INT|CTLFLAG_RD,
1395 			&fw->dropped_link_error_or_filtered,
1396 			0, mxge_handle_be32,
1397 			"I", "dropped_link_error_or_filtered");
1398 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1399 			"dropped_link_overflow",
1400 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1401 			0, mxge_handle_be32,
1402 			"I", "dropped_link_overflow");
1403 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1404 			"dropped_multicast_filtered",
1405 			CTLTYPE_INT|CTLFLAG_RD,
1406 			&fw->dropped_multicast_filtered,
1407 			0, mxge_handle_be32,
1408 			"I", "dropped_multicast_filtered");
1409 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1410 			"dropped_no_big_buffer",
1411 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1412 			0, mxge_handle_be32,
1413 			"I", "dropped_no_big_buffer");
1414 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1415 			"dropped_no_small_buffer",
1416 			CTLTYPE_INT|CTLFLAG_RD,
1417 			&fw->dropped_no_small_buffer,
1418 			0, mxge_handle_be32,
1419 			"I", "dropped_no_small_buffer");
1420 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1421 			"dropped_overrun",
1422 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1423 			0, mxge_handle_be32,
1424 			"I", "dropped_overrun");
1425 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1426 			"dropped_pause",
1427 			CTLTYPE_INT|CTLFLAG_RD,
1428 			&fw->dropped_pause,
1429 			0, mxge_handle_be32,
1430 			"I", "dropped_pause");
1431 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1432 			"dropped_runt",
1433 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1434 			0, mxge_handle_be32,
1435 			"I", "dropped_runt");
1436 
1437 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1438 			"dropped_unicast_filtered",
1439 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1440 			0, mxge_handle_be32,
1441 			"I", "dropped_unicast_filtered");
1442 
1443 	/* host counters exported for debugging */
1444 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1445 		       "rx_small_cnt",
1446 		       CTLFLAG_RD, &sc->rx_small.cnt,
1447 		       0, "rx_small_cnt");
1448 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1449 		       "rx_big_cnt",
1450 		       CTLFLAG_RD, &sc->rx_big.cnt,
1451 		       0, "rx_small_cnt");
1452 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1453 		       "tx_req",
1454 		       CTLFLAG_RD, &sc->tx.req,
1455 		       0, "tx_req");
1456 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1457 		       "tx_done",
1458 		       CTLFLAG_RD, &sc->tx.done,
1459 		       0, "tx_done");
1460 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1461 		       "tx_pkt_done",
1462 		       CTLFLAG_RD, &sc->tx.pkt_done,
1463 		       0, "tx_done");
1464 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1465 		       "tx_stall",
1466 		       CTLFLAG_RD, &sc->tx.stall,
1467 		       0, "tx_stall");
1468 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1469 		       "tx_wake",
1470 		       CTLFLAG_RD, &sc->tx.wake,
1471 		       0, "tx_wake");
1472 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1473 		       "tx_defrag",
1474 		       CTLFLAG_RD, &sc->tx_defrag,
1475 		       0, "tx_defrag");
1476 
1477 	/* verbose printing? */
1478 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1479 		       "verbose",
1480 		       CTLFLAG_RW, &mxge_verbose,
1481 		       0, "verbose printing");
1482 
1483 	/* lro */
1484 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1485 			"lro_cnt",
1486 			CTLTYPE_INT|CTLFLAG_RW, sc,
1487 			0, mxge_change_lro,
1488 			"I", "number of lro merge queues");
1489 
1490 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1491 		       "lro_flushed", CTLFLAG_RD, &sc->lro_flushed,
1492 		       0, "number of lro merge queues flushed");
1493 
1494 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1495 		       "lro_queued", CTLFLAG_RD, &sc->lro_queued,
1496 		       0, "number of frames appended to lro merge queues");
1497 
1498 }
1499 
1500 /* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1501    backwards one at a time and handle ring wraps */
1502 
1503 static inline void
1504 mxge_submit_req_backwards(mxge_tx_buf_t *tx,
1505 			    mcp_kreq_ether_send_t *src, int cnt)
1506 {
1507         int idx, starting_slot;
1508         starting_slot = tx->req;
1509         while (cnt > 1) {
1510                 cnt--;
1511                 idx = (starting_slot + cnt) & tx->mask;
1512                 mxge_pio_copy(&tx->lanai[idx],
1513 			      &src[cnt], sizeof(*src));
1514                 mb();
1515         }
1516 }
1517 
1518 /*
1519  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1520  * at most 32 bytes at a time, so as to avoid involving the software
1521  * pio handler in the nic.   We re-write the first segment's flags
1522  * to mark them valid only after writing the entire chain
1523  */
1524 
1525 static inline void
1526 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
1527                   int cnt)
1528 {
1529         int idx, i;
1530         uint32_t *src_ints;
1531 	volatile uint32_t *dst_ints;
1532         mcp_kreq_ether_send_t *srcp;
1533 	volatile mcp_kreq_ether_send_t *dstp, *dst;
1534 	uint8_t last_flags;
1535 
1536         idx = tx->req & tx->mask;
1537 
1538 	last_flags = src->flags;
1539 	src->flags = 0;
1540         mb();
1541         dst = dstp = &tx->lanai[idx];
1542         srcp = src;
1543 
1544         if ((idx + cnt) < tx->mask) {
1545                 for (i = 0; i < (cnt - 1); i += 2) {
1546                         mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1547                         mb(); /* force write every 32 bytes */
1548                         srcp += 2;
1549                         dstp += 2;
1550                 }
1551         } else {
1552                 /* submit all but the first request, and ensure
1553                    that it is submitted below */
1554                 mxge_submit_req_backwards(tx, src, cnt);
1555                 i = 0;
1556         }
1557         if (i < cnt) {
1558                 /* submit the first request */
1559                 mxge_pio_copy(dstp, srcp, sizeof(*src));
1560                 mb(); /* barrier before setting valid flag */
1561         }
1562 
1563         /* re-write the last 32-bits with the valid flags */
1564         src->flags = last_flags;
1565         src_ints = (uint32_t *)src;
1566         src_ints+=3;
1567         dst_ints = (volatile uint32_t *)dst;
1568         dst_ints+=3;
1569         *dst_ints =  *src_ints;
1570         tx->req += cnt;
1571         mb();
1572 }
1573 
1574 static void
1575 mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt,
1576 	       int ip_off)
1577 {
1578 	mxge_tx_buf_t *tx;
1579 	mcp_kreq_ether_send_t *req;
1580 	bus_dma_segment_t *seg;
1581 	struct ip *ip;
1582 	struct tcphdr *tcp;
1583 	uint32_t low, high_swapped;
1584 	int len, seglen, cum_len, cum_len_next;
1585 	int next_is_first, chop, cnt, rdma_count, small;
1586 	uint16_t pseudo_hdr_offset, cksum_offset, mss;
1587 	uint8_t flags, flags_next;
1588 	static int once;
1589 
1590 	mss = m->m_pkthdr.tso_segsz;
1591 
1592 	/* negative cum_len signifies to the
1593 	 * send loop that we are still in the
1594 	 * header portion of the TSO packet.
1595 	 */
1596 
1597 	/* ensure we have the ethernet, IP and TCP
1598 	   header together in the first mbuf, copy
1599 	   it to a scratch buffer if not */
1600 	if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1601 		m_copydata(m, 0, ip_off + sizeof (*ip),
1602 			   sc->scratch);
1603 		ip = (struct ip *)(sc->scratch + ip_off);
1604 	} else {
1605 		ip = (struct ip *)(mtod(m, char *) + ip_off);
1606 	}
1607 	if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1608 			    + sizeof (*tcp))) {
1609 		m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1610 			   + sizeof (*tcp),  sc->scratch);
1611 		ip = (struct ip *)(mtod(m, char *) + ip_off);
1612 	}
1613 
1614 	tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1615 	cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1616 
1617 	/* TSO implies checksum offload on this hardware */
1618 	cksum_offset = ip_off + (ip->ip_hl << 2);
1619 	flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1620 
1621 
1622 	/* for TSO, pseudo_hdr_offset holds mss.
1623 	 * The firmware figures out where to put
1624 	 * the checksum by parsing the header. */
1625 	pseudo_hdr_offset = htobe16(mss);
1626 
1627 	tx = &sc->tx;
1628 	req = tx->req_list;
1629 	seg = tx->seg_list;
1630 	cnt = 0;
1631 	rdma_count = 0;
1632 	/* "rdma_count" is the number of RDMAs belonging to the
1633 	 * current packet BEFORE the current send request. For
1634 	 * non-TSO packets, this is equal to "count".
1635 	 * For TSO packets, rdma_count needs to be reset
1636 	 * to 0 after a segment cut.
1637 	 *
1638 	 * The rdma_count field of the send request is
1639 	 * the number of RDMAs of the packet starting at
1640 	 * that request. For TSO send requests with one ore more cuts
1641 	 * in the middle, this is the number of RDMAs starting
1642 	 * after the last cut in the request. All previous
1643 	 * segments before the last cut implicitly have 1 RDMA.
1644 	 *
1645 	 * Since the number of RDMAs is not known beforehand,
1646 	 * it must be filled-in retroactively - after each
1647 	 * segmentation cut or at the end of the entire packet.
1648 	 */
1649 
1650 	while (busdma_seg_cnt) {
1651 		/* Break the busdma segment up into pieces*/
1652 		low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1653 		high_swapped = 	htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1654 		len = seg->ds_len;
1655 
1656 		while (len) {
1657 			flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1658 			seglen = len;
1659 			cum_len_next = cum_len + seglen;
1660 			(req-rdma_count)->rdma_count = rdma_count + 1;
1661 			if (__predict_true(cum_len >= 0)) {
1662 				/* payload */
1663 				chop = (cum_len_next > mss);
1664 				cum_len_next = cum_len_next % mss;
1665 				next_is_first = (cum_len_next == 0);
1666 				flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1667 				flags_next |= next_is_first *
1668 					MXGEFW_FLAGS_FIRST;
1669 				rdma_count |= -(chop | next_is_first);
1670 				rdma_count += chop & !next_is_first;
1671 			} else if (cum_len_next >= 0) {
1672 				/* header ends */
1673 				rdma_count = -1;
1674 				cum_len_next = 0;
1675 				seglen = -cum_len;
1676 				small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1677 				flags_next = MXGEFW_FLAGS_TSO_PLD |
1678 					MXGEFW_FLAGS_FIRST |
1679 					(small * MXGEFW_FLAGS_SMALL);
1680 			    }
1681 
1682 			req->addr_high = high_swapped;
1683 			req->addr_low = htobe32(low);
1684 			req->pseudo_hdr_offset = pseudo_hdr_offset;
1685 			req->pad = 0;
1686 			req->rdma_count = 1;
1687 			req->length = htobe16(seglen);
1688 			req->cksum_offset = cksum_offset;
1689 			req->flags = flags | ((cum_len & 1) *
1690 					      MXGEFW_FLAGS_ALIGN_ODD);
1691 			low += seglen;
1692 			len -= seglen;
1693 			cum_len = cum_len_next;
1694 			flags = flags_next;
1695 			req++;
1696 			cnt++;
1697 			rdma_count++;
1698 			if (__predict_false(cksum_offset > seglen))
1699 				cksum_offset -= seglen;
1700 			else
1701 				cksum_offset = 0;
1702 			if (__predict_false(cnt > tx->max_desc))
1703 				goto drop;
1704 		}
1705 		busdma_seg_cnt--;
1706 		seg++;
1707 	}
1708 	(req-rdma_count)->rdma_count = rdma_count;
1709 
1710 	do {
1711 		req--;
1712 		req->flags |= MXGEFW_FLAGS_TSO_LAST;
1713 	} while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1714 
1715 	tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1716 	mxge_submit_req(tx, tx->req_list, cnt);
1717 	return;
1718 
1719 drop:
1720 	bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1721 	m_freem(m);
1722 	sc->ifp->if_oerrors++;
1723 	if (!once) {
1724 		printf("tx->max_desc exceeded via TSO!\n");
1725 		printf("mss = %d, %ld, %d!\n", mss,
1726 		       (long)seg - (long)tx->seg_list, tx->max_desc);
1727 		once = 1;
1728 	}
1729 	return;
1730 
1731 }
1732 
1733 /*
1734  * We reproduce the software vlan tag insertion from
1735  * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1736  * vlan tag insertion. We need to advertise this in order to have the
1737  * vlan interface respect our csum offload flags.
1738  */
1739 static struct mbuf *
1740 mxge_vlan_tag_insert(struct mbuf *m)
1741 {
1742 	struct ether_vlan_header *evl;
1743 
1744 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1745 	if (__predict_false(m == NULL))
1746 		return NULL;
1747 	if (m->m_len < sizeof(*evl)) {
1748 		m = m_pullup(m, sizeof(*evl));
1749 		if (__predict_false(m == NULL))
1750 			return NULL;
1751 	}
1752 	/*
1753 	 * Transform the Ethernet header into an Ethernet header
1754 	 * with 802.1Q encapsulation.
1755 	 */
1756 	evl = mtod(m, struct ether_vlan_header *);
1757 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1758 	      (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1759 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1760 	evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1761 	m->m_flags &= ~M_VLANTAG;
1762 	return m;
1763 }
1764 
1765 static void
1766 mxge_encap(mxge_softc_t *sc, struct mbuf *m)
1767 {
1768 	mcp_kreq_ether_send_t *req;
1769 	bus_dma_segment_t *seg;
1770 	struct mbuf *m_tmp;
1771 	struct ifnet *ifp;
1772 	mxge_tx_buf_t *tx;
1773 	struct ip *ip;
1774 	int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1775 	uint16_t pseudo_hdr_offset;
1776         uint8_t flags, cksum_offset;
1777 
1778 
1779 
1780 	ifp = sc->ifp;
1781 	tx = &sc->tx;
1782 
1783 	ip_off = sizeof (struct ether_header);
1784 	if (m->m_flags & M_VLANTAG) {
1785 		m = mxge_vlan_tag_insert(m);
1786 		if (__predict_false(m == NULL))
1787 			goto drop;
1788 		ip_off += ETHER_VLAN_ENCAP_LEN;
1789 	}
1790 
1791 	/* (try to) map the frame for DMA */
1792 	idx = tx->req & tx->mask;
1793 	err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1794 				      m, tx->seg_list, &cnt,
1795 				      BUS_DMA_NOWAIT);
1796 	if (__predict_false(err == EFBIG)) {
1797 		/* Too many segments in the chain.  Try
1798 		   to defrag */
1799 		m_tmp = m_defrag(m, M_NOWAIT);
1800 		if (m_tmp == NULL) {
1801 			goto drop;
1802 		}
1803 		sc->tx_defrag++;
1804 		m = m_tmp;
1805 		err = bus_dmamap_load_mbuf_sg(tx->dmat,
1806 					      tx->info[idx].map,
1807 					      m, tx->seg_list, &cnt,
1808 					      BUS_DMA_NOWAIT);
1809 	}
1810 	if (__predict_false(err != 0)) {
1811 		device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
1812 			      " packet len = %d\n", err, m->m_pkthdr.len);
1813 		goto drop;
1814 	}
1815 	bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1816 			BUS_DMASYNC_PREWRITE);
1817 	tx->info[idx].m = m;
1818 
1819 
1820 	/* TSO is different enough, we handle it in another routine */
1821 	if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
1822 		mxge_encap_tso(sc, m, cnt, ip_off);
1823 		return;
1824 	}
1825 
1826 	req = tx->req_list;
1827 	cksum_offset = 0;
1828 	pseudo_hdr_offset = 0;
1829 	flags = MXGEFW_FLAGS_NO_TSO;
1830 
1831 	/* checksum offloading? */
1832 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1833 		/* ensure ip header is in first mbuf, copy
1834 		   it to a scratch buffer if not */
1835 		if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1836 			m_copydata(m, 0, ip_off + sizeof (*ip),
1837 				   sc->scratch);
1838 			ip = (struct ip *)(sc->scratch + ip_off);
1839 		} else {
1840 			ip = (struct ip *)(mtod(m, char *) + ip_off);
1841 		}
1842 		cksum_offset = ip_off + (ip->ip_hl << 2);
1843 		pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
1844 		pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1845 		req->cksum_offset = cksum_offset;
1846 		flags |= MXGEFW_FLAGS_CKSUM;
1847 		odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
1848 	} else {
1849 		odd_flag = 0;
1850 	}
1851 	if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1852 		flags |= MXGEFW_FLAGS_SMALL;
1853 
1854 	/* convert segments into a request list */
1855 	cum_len = 0;
1856 	seg = tx->seg_list;
1857 	req->flags = MXGEFW_FLAGS_FIRST;
1858 	for (i = 0; i < cnt; i++) {
1859 		req->addr_low =
1860 			htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1861 		req->addr_high =
1862 			htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1863 		req->length = htobe16(seg->ds_len);
1864 		req->cksum_offset = cksum_offset;
1865 		if (cksum_offset > seg->ds_len)
1866 			cksum_offset -= seg->ds_len;
1867 		else
1868 			cksum_offset = 0;
1869 		req->pseudo_hdr_offset = pseudo_hdr_offset;
1870 		req->pad = 0; /* complete solid 16-byte block */
1871 		req->rdma_count = 1;
1872 		req->flags |= flags | ((cum_len & 1) * odd_flag);
1873 		cum_len += seg->ds_len;
1874 		seg++;
1875 		req++;
1876 		req->flags = 0;
1877 	}
1878 	req--;
1879 	/* pad runts to 60 bytes */
1880 	if (cum_len < 60) {
1881 		req++;
1882 		req->addr_low =
1883 			htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
1884 		req->addr_high =
1885 			htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
1886 		req->length = htobe16(60 - cum_len);
1887 		req->cksum_offset = 0;
1888 		req->pseudo_hdr_offset = pseudo_hdr_offset;
1889 		req->pad = 0; /* complete solid 16-byte block */
1890 		req->rdma_count = 1;
1891 		req->flags |= flags | ((cum_len & 1) * odd_flag);
1892 		cnt++;
1893 	}
1894 
1895 	tx->req_list[0].rdma_count = cnt;
1896 #if 0
1897 	/* print what the firmware will see */
1898 	for (i = 0; i < cnt; i++) {
1899 		printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1900 		    "cso:%d, flags:0x%x, rdma:%d\n",
1901 		    i, (int)ntohl(tx->req_list[i].addr_high),
1902 		    (int)ntohl(tx->req_list[i].addr_low),
1903 		    (int)ntohs(tx->req_list[i].length),
1904 		    (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1905 		    tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1906 		    tx->req_list[i].rdma_count);
1907 	}
1908 	printf("--------------\n");
1909 #endif
1910 	tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1911 	mxge_submit_req(tx, tx->req_list, cnt);
1912 	return;
1913 
1914 drop:
1915 	m_freem(m);
1916 	ifp->if_oerrors++;
1917 	return;
1918 }
1919 
1920 
1921 
1922 
1923 static inline void
1924 mxge_start_locked(mxge_softc_t *sc)
1925 {
1926 	struct mbuf *m;
1927 	struct ifnet *ifp;
1928 	mxge_tx_buf_t *tx;
1929 
1930 	ifp = sc->ifp;
1931 	tx = &sc->tx;
1932 	while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
1933 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1934 		if (m == NULL) {
1935 			return;
1936 		}
1937 		/* let BPF see it */
1938 		BPF_MTAP(ifp, m);
1939 
1940 		/* give it to the nic */
1941 		mxge_encap(sc, m);
1942 	}
1943 	/* ran out of transmit slots */
1944 	if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1945 		sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1946 		tx->stall++;
1947 	}
1948 }
1949 
1950 static void
1951 mxge_start(struct ifnet *ifp)
1952 {
1953 	mxge_softc_t *sc = ifp->if_softc;
1954 
1955 
1956 	mtx_lock(&sc->tx_mtx);
1957 	mxge_start_locked(sc);
1958 	mtx_unlock(&sc->tx_mtx);
1959 }
1960 
1961 /*
1962  * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
1963  * at most 32 bytes at a time, so as to avoid involving the software
1964  * pio handler in the nic.   We re-write the first segment's low
1965  * DMA address to mark it valid only after we write the entire chunk
1966  * in a burst
1967  */
1968 static inline void
1969 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
1970 		mcp_kreq_ether_recv_t *src)
1971 {
1972 	uint32_t low;
1973 
1974 	low = src->addr_low;
1975 	src->addr_low = 0xffffffff;
1976 	mxge_pio_copy(dst, src, 4 * sizeof (*src));
1977 	mb();
1978 	mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
1979 	mb();
1980 	src->addr_low = low;
1981 	dst->addr_low = low;
1982 	mb();
1983 }
1984 
1985 static int
1986 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx)
1987 {
1988 	bus_dma_segment_t seg;
1989 	struct mbuf *m;
1990 	mxge_rx_buf_t *rx = &sc->rx_small;
1991 	int cnt, err;
1992 
1993 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1994 	if (m == NULL) {
1995 		rx->alloc_fail++;
1996 		err = ENOBUFS;
1997 		goto done;
1998 	}
1999 	m->m_len = MHLEN;
2000 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2001 				      &seg, &cnt, BUS_DMA_NOWAIT);
2002 	if (err != 0) {
2003 		m_free(m);
2004 		goto done;
2005 	}
2006 	rx->info[idx].m = m;
2007 	rx->shadow[idx].addr_low =
2008 		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2009 	rx->shadow[idx].addr_high =
2010 		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2011 
2012 done:
2013 	if ((idx & 7) == 7)
2014 		mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2015 	return err;
2016 }
2017 
2018 static int
2019 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx)
2020 {
2021 	bus_dma_segment_t seg[3];
2022 	struct mbuf *m;
2023 	mxge_rx_buf_t *rx = &sc->rx_big;
2024 	int cnt, err, i;
2025 
2026 	if (rx->cl_size == MCLBYTES)
2027 		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2028 	else
2029 		m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2030 	if (m == NULL) {
2031 		rx->alloc_fail++;
2032 		err = ENOBUFS;
2033 		goto done;
2034 	}
2035 	m->m_len = rx->cl_size;
2036 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2037 				      seg, &cnt, BUS_DMA_NOWAIT);
2038 	if (err != 0) {
2039 		m_free(m);
2040 		goto done;
2041 	}
2042 	rx->info[idx].m = m;
2043 
2044 	for (i = 0; i < cnt; i++) {
2045 		rx->shadow[idx + i].addr_low =
2046 			htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2047 		rx->shadow[idx + i].addr_high =
2048 			htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2049        }
2050 
2051 
2052 done:
2053        for (i = 0; i < rx->nbufs; i++) {
2054 		if ((idx & 7) == 7) {
2055 			mxge_submit_8rx(&rx->lanai[idx - 7],
2056 					&rx->shadow[idx - 7]);
2057 		}
2058 		idx++;
2059 	}
2060 	return err;
2061 }
2062 
2063 /*
2064  *  Myri10GE hardware checksums are not valid if the sender
2065  *  padded the frame with non-zero padding.  This is because
2066  *  the firmware just does a simple 16-bit 1s complement
2067  *  checksum across the entire frame, excluding the first 14
2068  *  bytes.  It is best to simply to check the checksum and
2069  *  tell the stack about it only if the checksum is good
2070  */
2071 
2072 static inline uint16_t
2073 mxge_rx_csum(struct mbuf *m, int csum)
2074 {
2075 	struct ether_header *eh;
2076 	struct ip *ip;
2077 	uint16_t c;
2078 
2079 	eh = mtod(m, struct ether_header *);
2080 
2081 	/* only deal with IPv4 TCP & UDP for now */
2082 	if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2083 		return 1;
2084 	ip = (struct ip *)(eh + 1);
2085 	if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2086 			    ip->ip_p != IPPROTO_UDP))
2087 		return 1;
2088 
2089 	c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2090 		      htonl(ntohs(csum) + ntohs(ip->ip_len) +
2091 			    - (ip->ip_hl << 2) + ip->ip_p));
2092 	c ^= 0xffff;
2093 	return (c);
2094 }
2095 
2096 static void
2097 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2098 {
2099 	struct ether_vlan_header *evl;
2100 	struct ether_header *eh;
2101 	uint32_t partial;
2102 
2103 	evl = mtod(m, struct ether_vlan_header *);
2104 	eh = mtod(m, struct ether_header *);
2105 
2106 	/*
2107 	 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2108 	 * after what the firmware thought was the end of the ethernet
2109 	 * header.
2110 	 */
2111 
2112 	/* put checksum into host byte order */
2113 	*csum = ntohs(*csum);
2114 	partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2115 	(*csum) += ~partial;
2116 	(*csum) +=  ((*csum) < ~partial);
2117 	(*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2118 	(*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2119 
2120 	/* restore checksum to network byte order;
2121 	   later consumers expect this */
2122 	*csum = htons(*csum);
2123 
2124 	/* save the tag */
2125 	m->m_flags |= M_VLANTAG;
2126 	m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2127 
2128 	/*
2129 	 * Remove the 802.1q header by copying the Ethernet
2130 	 * addresses over it and adjusting the beginning of
2131 	 * the data in the mbuf.  The encapsulated Ethernet
2132 	 * type field is already in place.
2133 	 */
2134 	bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2135 	      ETHER_HDR_LEN - ETHER_TYPE_LEN);
2136 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
2137 }
2138 
2139 
2140 static inline void
2141 mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum)
2142 {
2143 	struct ifnet *ifp;
2144 	struct mbuf *m;
2145 	struct ether_header *eh;
2146 	mxge_rx_buf_t *rx;
2147 	bus_dmamap_t old_map;
2148 	int idx;
2149 	uint16_t tcpudp_csum;
2150 
2151 	ifp = sc->ifp;
2152 	rx = &sc->rx_big;
2153 	idx = rx->cnt & rx->mask;
2154 	rx->cnt += rx->nbufs;
2155 	/* save a pointer to the received mbuf */
2156 	m = rx->info[idx].m;
2157 	/* try to replace the received mbuf */
2158 	if (mxge_get_buf_big(sc, rx->extra_map, idx)) {
2159 		/* drop the frame -- the old mbuf is re-cycled */
2160 		ifp->if_ierrors++;
2161 		return;
2162 	}
2163 
2164 	/* unmap the received buffer */
2165 	old_map = rx->info[idx].map;
2166 	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2167 	bus_dmamap_unload(rx->dmat, old_map);
2168 
2169 	/* swap the bus_dmamap_t's */
2170 	rx->info[idx].map = rx->extra_map;
2171 	rx->extra_map = old_map;
2172 
2173 	/* mcp implicitly skips 1st 2 bytes so that packet is properly
2174 	 * aligned */
2175 	m->m_data += MXGEFW_PAD;
2176 
2177 	m->m_pkthdr.rcvif = ifp;
2178 	m->m_len = m->m_pkthdr.len = len;
2179 	ifp->if_ipackets++;
2180 	eh = mtod(m, struct ether_header *);
2181 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2182 		mxge_vlan_tag_remove(m, &csum);
2183 	}
2184 	/* if the checksum is valid, mark it in the mbuf header */
2185 	if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2186 		if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum)))
2187 			return;
2188 		/* otherwise, it was a UDP frame, or a TCP frame which
2189 		   we could not do LRO on.  Tell the stack that the
2190 		   checksum is good */
2191 		m->m_pkthdr.csum_data = 0xffff;
2192 		m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2193 	}
2194 	/* pass the frame up the stack */
2195 	(*ifp->if_input)(ifp, m);
2196 }
2197 
2198 static inline void
2199 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum)
2200 {
2201 	struct ifnet *ifp;
2202 	struct ether_header *eh;
2203 	struct mbuf *m;
2204 	mxge_rx_buf_t *rx;
2205 	bus_dmamap_t old_map;
2206 	int idx;
2207 	uint16_t tcpudp_csum;
2208 
2209 	ifp = sc->ifp;
2210 	rx = &sc->rx_small;
2211 	idx = rx->cnt & rx->mask;
2212 	rx->cnt++;
2213 	/* save a pointer to the received mbuf */
2214 	m = rx->info[idx].m;
2215 	/* try to replace the received mbuf */
2216 	if (mxge_get_buf_small(sc, rx->extra_map, idx)) {
2217 		/* drop the frame -- the old mbuf is re-cycled */
2218 		ifp->if_ierrors++;
2219 		return;
2220 	}
2221 
2222 	/* unmap the received buffer */
2223 	old_map = rx->info[idx].map;
2224 	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2225 	bus_dmamap_unload(rx->dmat, old_map);
2226 
2227 	/* swap the bus_dmamap_t's */
2228 	rx->info[idx].map = rx->extra_map;
2229 	rx->extra_map = old_map;
2230 
2231 	/* mcp implicitly skips 1st 2 bytes so that packet is properly
2232 	 * aligned */
2233 	m->m_data += MXGEFW_PAD;
2234 
2235 	m->m_pkthdr.rcvif = ifp;
2236 	m->m_len = m->m_pkthdr.len = len;
2237 	ifp->if_ipackets++;
2238 	eh = mtod(m, struct ether_header *);
2239 	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2240 		mxge_vlan_tag_remove(m, &csum);
2241 	}
2242 	/* if the checksum is valid, mark it in the mbuf header */
2243 	if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2244 		if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum)))
2245 			return;
2246 		/* otherwise, it was a UDP frame, or a TCP frame which
2247 		   we could not do LRO on.  Tell the stack that the
2248 		   checksum is good */
2249 		m->m_pkthdr.csum_data = 0xffff;
2250 		m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2251 	}
2252 
2253 	/* pass the frame up the stack */
2254 	(*ifp->if_input)(ifp, m);
2255 }
2256 
2257 static inline void
2258 mxge_clean_rx_done(mxge_softc_t *sc)
2259 {
2260 	mxge_rx_done_t *rx_done = &sc->rx_done;
2261 	struct lro_entry *lro;
2262 	int limit = 0;
2263 	uint16_t length;
2264 	uint16_t checksum;
2265 
2266 
2267 	while (rx_done->entry[rx_done->idx].length != 0) {
2268 		length = ntohs(rx_done->entry[rx_done->idx].length);
2269 		rx_done->entry[rx_done->idx].length = 0;
2270 		checksum = rx_done->entry[rx_done->idx].checksum;
2271 		if (length <= (MHLEN - MXGEFW_PAD))
2272 			mxge_rx_done_small(sc, length, checksum);
2273 		else
2274 			mxge_rx_done_big(sc, length, checksum);
2275 		rx_done->cnt++;
2276 		rx_done->idx = rx_done->cnt & rx_done->mask;
2277 
2278 		/* limit potential for livelock */
2279 		if (__predict_false(++limit > 2 * rx_done->mask))
2280 			break;
2281 	}
2282 	while(!SLIST_EMPTY(&sc->lro_active)) {
2283 		lro = SLIST_FIRST(&sc->lro_active);
2284 		SLIST_REMOVE_HEAD(&sc->lro_active, next);
2285 		mxge_lro_flush(sc, lro);
2286 	}
2287 }
2288 
2289 
2290 static inline void
2291 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx)
2292 {
2293 	struct ifnet *ifp;
2294 	mxge_tx_buf_t *tx;
2295 	struct mbuf *m;
2296 	bus_dmamap_t map;
2297 	int idx, limit;
2298 
2299 	limit = 0;
2300 	tx = &sc->tx;
2301 	ifp = sc->ifp;
2302 	while (tx->pkt_done != mcp_idx) {
2303 		idx = tx->done & tx->mask;
2304 		tx->done++;
2305 		m = tx->info[idx].m;
2306 		/* mbuf and DMA map only attached to the first
2307 		   segment per-mbuf */
2308 		if (m != NULL) {
2309 			ifp->if_opackets++;
2310 			tx->info[idx].m = NULL;
2311 			map = tx->info[idx].map;
2312 			bus_dmamap_unload(tx->dmat, map);
2313 			m_freem(m);
2314 		}
2315 		if (tx->info[idx].flag) {
2316 			tx->info[idx].flag = 0;
2317 			tx->pkt_done++;
2318 		}
2319 		/* limit potential for livelock by only handling
2320 		   2 full tx rings per call */
2321 		if (__predict_false(++limit >  2 * tx->mask))
2322 			break;
2323 	}
2324 
2325 	/* If we have space, clear IFF_OACTIVE to tell the stack that
2326            its OK to send packets */
2327 
2328 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
2329 	    tx->req - tx->done < (tx->mask + 1)/4) {
2330 		mtx_lock(&sc->tx_mtx);
2331 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2332 		sc->tx.wake++;
2333 		mxge_start_locked(sc);
2334 		mtx_unlock(&sc->tx_mtx);
2335 	}
2336 }
2337 
2338 static void
2339 mxge_intr(void *arg)
2340 {
2341 	mxge_softc_t *sc = arg;
2342 	mcp_irq_data_t *stats = sc->fw_stats;
2343 	mxge_tx_buf_t *tx = &sc->tx;
2344 	mxge_rx_done_t *rx_done = &sc->rx_done;
2345 	uint32_t send_done_count;
2346 	uint8_t valid;
2347 
2348 
2349 	/* make sure the DMA has finished */
2350 	if (!stats->valid) {
2351 		return;
2352 	}
2353 	valid = stats->valid;
2354 
2355 	if (!sc->msi_enabled) {
2356 		/* lower legacy IRQ  */
2357 		*sc->irq_deassert = 0;
2358 		if (!mxge_deassert_wait)
2359 			/* don't wait for conf. that irq is low */
2360 			stats->valid = 0;
2361 	} else {
2362 		stats->valid = 0;
2363 	}
2364 
2365 	/* loop while waiting for legacy irq deassertion */
2366 	do {
2367 		/* check for transmit completes and receives */
2368 		send_done_count = be32toh(stats->send_done_count);
2369 		while ((send_done_count != tx->pkt_done) ||
2370 		       (rx_done->entry[rx_done->idx].length != 0)) {
2371 			mxge_tx_done(sc, (int)send_done_count);
2372 			mxge_clean_rx_done(sc);
2373 			send_done_count = be32toh(stats->send_done_count);
2374 		}
2375 	} while (*((volatile uint8_t *) &stats->valid));
2376 
2377 	if (__predict_false(stats->stats_updated)) {
2378 		if (sc->link_state != stats->link_up) {
2379 			sc->link_state = stats->link_up;
2380 			if (sc->link_state) {
2381 				if_link_state_change(sc->ifp, LINK_STATE_UP);
2382 				if (mxge_verbose)
2383 					device_printf(sc->dev, "link up\n");
2384 			} else {
2385 				if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2386 				if (mxge_verbose)
2387 					device_printf(sc->dev, "link down\n");
2388 			}
2389 		}
2390 		if (sc->rdma_tags_available !=
2391 		    be32toh(sc->fw_stats->rdma_tags_available)) {
2392 			sc->rdma_tags_available =
2393 				be32toh(sc->fw_stats->rdma_tags_available);
2394 			device_printf(sc->dev, "RDMA timed out! %d tags "
2395 				      "left\n", sc->rdma_tags_available);
2396 		}
2397 		sc->down_cnt += stats->link_down;
2398 	}
2399 
2400 	/* check to see if we have rx token to pass back */
2401 	if (valid & 0x1)
2402 	    *sc->irq_claim = be32toh(3);
2403 	*(sc->irq_claim + 1) = be32toh(3);
2404 }
2405 
2406 static void
2407 mxge_init(void *arg)
2408 {
2409 }
2410 
2411 
2412 
2413 static void
2414 mxge_free_mbufs(mxge_softc_t *sc)
2415 {
2416 	int i;
2417 
2418 	for (i = 0; i <= sc->rx_big.mask; i++) {
2419 		if (sc->rx_big.info[i].m == NULL)
2420 			continue;
2421 		bus_dmamap_unload(sc->rx_big.dmat,
2422 				  sc->rx_big.info[i].map);
2423 		m_freem(sc->rx_big.info[i].m);
2424 		sc->rx_big.info[i].m = NULL;
2425 	}
2426 
2427 	for (i = 0; i <= sc->rx_small.mask; i++) {
2428 		if (sc->rx_small.info[i].m == NULL)
2429 			continue;
2430 		bus_dmamap_unload(sc->rx_small.dmat,
2431 				  sc->rx_small.info[i].map);
2432 		m_freem(sc->rx_small.info[i].m);
2433 		sc->rx_small.info[i].m = NULL;
2434 	}
2435 
2436 	for (i = 0; i <= sc->tx.mask; i++) {
2437 		sc->tx.info[i].flag = 0;
2438 		if (sc->tx.info[i].m == NULL)
2439 			continue;
2440 		bus_dmamap_unload(sc->tx.dmat,
2441 				  sc->tx.info[i].map);
2442 		m_freem(sc->tx.info[i].m);
2443 		sc->tx.info[i].m = NULL;
2444 	}
2445 }
2446 
2447 static void
2448 mxge_free_rings(mxge_softc_t *sc)
2449 {
2450 	int i;
2451 
2452 	if (sc->rx_done.entry != NULL)
2453 		mxge_dma_free(&sc->rx_done.dma);
2454 	sc->rx_done.entry = NULL;
2455 	if (sc->tx.req_bytes != NULL)
2456 		free(sc->tx.req_bytes, M_DEVBUF);
2457 	if (sc->tx.seg_list != NULL)
2458 		free(sc->tx.seg_list, M_DEVBUF);
2459 	if (sc->rx_small.shadow != NULL)
2460 		free(sc->rx_small.shadow, M_DEVBUF);
2461 	if (sc->rx_big.shadow != NULL)
2462 		free(sc->rx_big.shadow, M_DEVBUF);
2463 	if (sc->tx.info != NULL) {
2464 		if (sc->tx.dmat != NULL) {
2465 			for (i = 0; i <= sc->tx.mask; i++) {
2466 				bus_dmamap_destroy(sc->tx.dmat,
2467 						   sc->tx.info[i].map);
2468 			}
2469 			bus_dma_tag_destroy(sc->tx.dmat);
2470 		}
2471 		free(sc->tx.info, M_DEVBUF);
2472 	}
2473 	if (sc->rx_small.info != NULL) {
2474 		if (sc->rx_small.dmat != NULL) {
2475 			for (i = 0; i <= sc->rx_small.mask; i++) {
2476 				bus_dmamap_destroy(sc->rx_small.dmat,
2477 						   sc->rx_small.info[i].map);
2478 			}
2479 			bus_dmamap_destroy(sc->rx_small.dmat,
2480 					   sc->rx_small.extra_map);
2481 			bus_dma_tag_destroy(sc->rx_small.dmat);
2482 		}
2483 		free(sc->rx_small.info, M_DEVBUF);
2484 	}
2485 	if (sc->rx_big.info != NULL) {
2486 		if (sc->rx_big.dmat != NULL) {
2487 			for (i = 0; i <= sc->rx_big.mask; i++) {
2488 				bus_dmamap_destroy(sc->rx_big.dmat,
2489 						   sc->rx_big.info[i].map);
2490 			}
2491 			bus_dmamap_destroy(sc->rx_big.dmat,
2492 					   sc->rx_big.extra_map);
2493 			bus_dma_tag_destroy(sc->rx_big.dmat);
2494 		}
2495 		free(sc->rx_big.info, M_DEVBUF);
2496 	}
2497 }
2498 
2499 static int
2500 mxge_alloc_rings(mxge_softc_t *sc)
2501 {
2502 	mxge_cmd_t cmd;
2503 	int tx_ring_size, rx_ring_size;
2504 	int tx_ring_entries, rx_ring_entries;
2505 	int i, err;
2506 	unsigned long bytes;
2507 
2508 	/* get ring sizes */
2509 	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
2510 	tx_ring_size = cmd.data0;
2511 	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
2512 	if (err != 0) {
2513 		device_printf(sc->dev, "Cannot determine ring sizes\n");
2514 		goto abort_with_nothing;
2515 	}
2516 
2517 	rx_ring_size = cmd.data0;
2518 
2519 	tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
2520 	rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
2521 	IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
2522 	sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
2523 	IFQ_SET_READY(&sc->ifp->if_snd);
2524 
2525 	sc->tx.mask = tx_ring_entries - 1;
2526 	sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
2527 	sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
2528 	sc->rx_done.mask = (2 * rx_ring_entries) - 1;
2529 
2530 	err = ENOMEM;
2531 
2532 	/* allocate interrupt queues */
2533 	bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry);
2534 	err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096);
2535 	if (err != 0)
2536 		goto abort_with_nothing;
2537 	sc->rx_done.entry = sc->rx_done.dma.addr;
2538 	bzero(sc->rx_done.entry, bytes);
2539 
2540 	/* allocate the tx request copy block */
2541 	bytes = 8 +
2542 		sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4);
2543 	sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
2544 	if (sc->tx.req_bytes == NULL)
2545 		goto abort_with_alloc;
2546 	/* ensure req_list entries are aligned to 8 bytes */
2547 	sc->tx.req_list = (mcp_kreq_ether_send_t *)
2548 		((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
2549 
2550 	/* allocate the tx busdma segment list */
2551 	bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc;
2552 	sc->tx.seg_list = (bus_dma_segment_t *)
2553 		malloc(bytes, M_DEVBUF, M_WAITOK);
2554 	if (sc->tx.seg_list == NULL)
2555 		goto abort_with_alloc;
2556 
2557 	/* allocate the rx shadow rings */
2558 	bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
2559 	sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2560 	if (sc->rx_small.shadow == NULL)
2561 		goto abort_with_alloc;
2562 
2563 	bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
2564 	sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2565 	if (sc->rx_big.shadow == NULL)
2566 		goto abort_with_alloc;
2567 
2568 	/* allocate the host info rings */
2569 	bytes = tx_ring_entries * sizeof (*sc->tx.info);
2570 	sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2571 	if (sc->tx.info == NULL)
2572 		goto abort_with_alloc;
2573 
2574 	bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
2575 	sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2576 	if (sc->rx_small.info == NULL)
2577 		goto abort_with_alloc;
2578 
2579 	bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
2580 	sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2581 	if (sc->rx_big.info == NULL)
2582 		goto abort_with_alloc;
2583 
2584 	/* allocate the busdma resources */
2585 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
2586 				 1,			/* alignment */
2587 				 sc->tx.boundary,	/* boundary */
2588 				 BUS_SPACE_MAXADDR,	/* low */
2589 				 BUS_SPACE_MAXADDR,	/* high */
2590 				 NULL, NULL,		/* filter */
2591 				 65536 + 256,		/* maxsize */
2592 				 sc->tx.max_desc - 2,	/* num segs */
2593 				 sc->tx.boundary,	/* maxsegsize */
2594 				 BUS_DMA_ALLOCNOW,	/* flags */
2595 				 NULL, NULL,		/* lock */
2596 				 &sc->tx.dmat);		/* tag */
2597 
2598 	if (err != 0) {
2599 		device_printf(sc->dev, "Err %d allocating tx dmat\n",
2600 			      err);
2601 		goto abort_with_alloc;
2602 	}
2603 
2604 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
2605 				 1,			/* alignment */
2606 				 4096,			/* boundary */
2607 				 BUS_SPACE_MAXADDR,	/* low */
2608 				 BUS_SPACE_MAXADDR,	/* high */
2609 				 NULL, NULL,		/* filter */
2610 				 MHLEN,			/* maxsize */
2611 				 1,			/* num segs */
2612 				 MHLEN,			/* maxsegsize */
2613 				 BUS_DMA_ALLOCNOW,	/* flags */
2614 				 NULL, NULL,		/* lock */
2615 				 &sc->rx_small.dmat);	/* tag */
2616 	if (err != 0) {
2617 		device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
2618 			      err);
2619 		goto abort_with_alloc;
2620 	}
2621 
2622 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
2623 				 1,			/* alignment */
2624 				 4096,			/* boundary */
2625 				 BUS_SPACE_MAXADDR,	/* low */
2626 				 BUS_SPACE_MAXADDR,	/* high */
2627 				 NULL, NULL,		/* filter */
2628 				 3*4096,		/* maxsize */
2629 				 3,			/* num segs */
2630 				 4096,			/* maxsegsize */
2631 				 BUS_DMA_ALLOCNOW,	/* flags */
2632 				 NULL, NULL,		/* lock */
2633 				 &sc->rx_big.dmat);	/* tag */
2634 	if (err != 0) {
2635 		device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
2636 			      err);
2637 		goto abort_with_alloc;
2638 	}
2639 
2640 	/* now use these tags to setup dmamaps for each slot
2641 	   in each ring */
2642 	for (i = 0; i <= sc->tx.mask; i++) {
2643 		err = bus_dmamap_create(sc->tx.dmat, 0,
2644 					&sc->tx.info[i].map);
2645 		if (err != 0) {
2646 			device_printf(sc->dev, "Err %d  tx dmamap\n",
2647 			      err);
2648 			goto abort_with_alloc;
2649 		}
2650 	}
2651 	for (i = 0; i <= sc->rx_small.mask; i++) {
2652 		err = bus_dmamap_create(sc->rx_small.dmat, 0,
2653 					&sc->rx_small.info[i].map);
2654 		if (err != 0) {
2655 			device_printf(sc->dev, "Err %d  rx_small dmamap\n",
2656 				      err);
2657 			goto abort_with_alloc;
2658 		}
2659 	}
2660 	err = bus_dmamap_create(sc->rx_small.dmat, 0,
2661 				&sc->rx_small.extra_map);
2662 	if (err != 0) {
2663 		device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
2664 			      err);
2665 			goto abort_with_alloc;
2666 	}
2667 
2668 	for (i = 0; i <= sc->rx_big.mask; i++) {
2669 		err = bus_dmamap_create(sc->rx_big.dmat, 0,
2670 					&sc->rx_big.info[i].map);
2671 		if (err != 0) {
2672 			device_printf(sc->dev, "Err %d  rx_big dmamap\n",
2673 			      err);
2674 			goto abort_with_alloc;
2675 		}
2676 	}
2677 	err = bus_dmamap_create(sc->rx_big.dmat, 0,
2678 				&sc->rx_big.extra_map);
2679 	if (err != 0) {
2680 		device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
2681 			      err);
2682 			goto abort_with_alloc;
2683 	}
2684 	return 0;
2685 
2686 abort_with_alloc:
2687 	mxge_free_rings(sc);
2688 
2689 abort_with_nothing:
2690 	return err;
2691 }
2692 
2693 static void
2694 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
2695 {
2696 	int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
2697 
2698 	if (bufsize < MCLBYTES) {
2699 		/* easy, everything fits in a single buffer */
2700 		*big_buf_size = MCLBYTES;
2701 		*cl_size = MCLBYTES;
2702 		*nbufs = 1;
2703 		return;
2704 	}
2705 
2706 	if (bufsize < MJUMPAGESIZE) {
2707 		/* still easy, everything still fits in a single buffer */
2708 		*big_buf_size = MJUMPAGESIZE;
2709 		*cl_size = MJUMPAGESIZE;
2710 		*nbufs = 1;
2711 		return;
2712 	}
2713 	/* now we need to use virtually contiguous buffers */
2714 	*cl_size = MJUM9BYTES;
2715 	*big_buf_size = 4096;
2716 	*nbufs = mtu / 4096 + 1;
2717 	/* needs to be a power of two, so round up */
2718 	if (*nbufs == 3)
2719 		*nbufs = 4;
2720 }
2721 
2722 static int
2723 mxge_open(mxge_softc_t *sc)
2724 {
2725 	mxge_cmd_t cmd;
2726 	int i, err, big_bytes;
2727 	bus_dmamap_t map;
2728 	bus_addr_t bus;
2729 	struct lro_entry *lro_entry;
2730 
2731 	SLIST_INIT(&sc->lro_free);
2732 	SLIST_INIT(&sc->lro_active);
2733 
2734 	for (i = 0; i < sc->lro_cnt; i++) {
2735 		lro_entry = (struct lro_entry *)
2736 			malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO);
2737 		if (lro_entry == NULL) {
2738 			sc->lro_cnt = i;
2739 			break;
2740 		}
2741 		SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next);
2742 	}
2743 
2744 	/* Copy the MAC address in case it was overridden */
2745 	bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
2746 
2747 	err = mxge_reset(sc, 1);
2748 	if (err != 0) {
2749 		device_printf(sc->dev, "failed to reset\n");
2750 		return EIO;
2751 	}
2752 
2753 	mxge_choose_params(sc->ifp->if_mtu, &big_bytes,
2754 			   &sc->rx_big.cl_size, &sc->rx_big.nbufs);
2755 
2756 	cmd.data0 = sc->rx_big.nbufs;
2757 	err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
2758 			    &cmd);
2759 	/* error is only meaningful if we're trying to set
2760 	   MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
2761 	if (err && sc->rx_big.nbufs > 1) {
2762 		device_printf(sc->dev,
2763 			      "Failed to set alway-use-n to %d\n",
2764 			      sc->rx_big.nbufs);
2765 		return EIO;
2766 	}
2767 	/* get the lanai pointers to the send and receive rings */
2768 
2769 	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
2770 	sc->tx.lanai =
2771 		(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
2772 	err |= mxge_send_cmd(sc,
2773 				 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
2774 	sc->rx_small.lanai =
2775 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2776 	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
2777 	sc->rx_big.lanai =
2778 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2779 
2780 	if (err != 0) {
2781 		device_printf(sc->dev,
2782 			      "failed to get ring sizes or locations\n");
2783 		return EIO;
2784 	}
2785 
2786 	/* stock receive rings */
2787 	for (i = 0; i <= sc->rx_small.mask; i++) {
2788 		map = sc->rx_small.info[i].map;
2789 		err = mxge_get_buf_small(sc, map, i);
2790 		if (err) {
2791 			device_printf(sc->dev, "alloced %d/%d smalls\n",
2792 				      i, sc->rx_small.mask + 1);
2793 			goto abort;
2794 		}
2795 	}
2796 	for (i = 0; i <= sc->rx_big.mask; i++) {
2797 		sc->rx_big.shadow[i].addr_low = 0xffffffff;
2798 		sc->rx_big.shadow[i].addr_high = 0xffffffff;
2799 	}
2800 	for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) {
2801 		map = sc->rx_big.info[i].map;
2802 		err = mxge_get_buf_big(sc, map, i);
2803 		if (err) {
2804 			device_printf(sc->dev, "alloced %d/%d bigs\n",
2805 				      i, sc->rx_big.mask + 1);
2806 			goto abort;
2807 		}
2808 	}
2809 
2810 	/* Give the firmware the mtu and the big and small buffer
2811 	   sizes.  The firmware wants the big buf size to be a power
2812 	   of two. Luckily, FreeBSD's clusters are powers of two */
2813 	cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2814 	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
2815 	cmd.data0 = MHLEN - MXGEFW_PAD;
2816 	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
2817 			     &cmd);
2818 	cmd.data0 = big_bytes;
2819 	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2820 
2821 	if (err != 0) {
2822 		device_printf(sc->dev, "failed to setup params\n");
2823 		goto abort;
2824 	}
2825 
2826 	/* Now give him the pointer to the stats block */
2827 	cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
2828 	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
2829 	cmd.data2 = sizeof(struct mcp_irq_data);
2830 	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
2831 
2832 	if (err != 0) {
2833 		bus = sc->fw_stats_dma.bus_addr;
2834 		bus += offsetof(struct mcp_irq_data, send_done_count);
2835 		cmd.data0 = MXGE_LOWPART_TO_U32(bus);
2836 		cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
2837 		err = mxge_send_cmd(sc,
2838 				    MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
2839 				    &cmd);
2840 		/* Firmware cannot support multicast without STATS_DMA_V2 */
2841 		sc->fw_multicast_support = 0;
2842 	} else {
2843 		sc->fw_multicast_support = 1;
2844 	}
2845 
2846 	if (err != 0) {
2847 		device_printf(sc->dev, "failed to setup params\n");
2848 		goto abort;
2849 	}
2850 
2851 	/* Finally, start the firmware running */
2852 	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
2853 	if (err) {
2854 		device_printf(sc->dev, "Couldn't bring up link\n");
2855 		goto abort;
2856 	}
2857 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2858 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2859 
2860 	return 0;
2861 
2862 
2863 abort:
2864 	mxge_free_mbufs(sc);
2865 
2866 	return err;
2867 }
2868 
2869 static int
2870 mxge_close(mxge_softc_t *sc)
2871 {
2872 	struct lro_entry *lro_entry;
2873 	mxge_cmd_t cmd;
2874 	int err, old_down_cnt;
2875 
2876 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2877 	old_down_cnt = sc->down_cnt;
2878 	mb();
2879 	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2880 	if (err) {
2881 		device_printf(sc->dev, "Couldn't bring down link\n");
2882 	}
2883 	if (old_down_cnt == sc->down_cnt) {
2884 		/* wait for down irq */
2885 		DELAY(10 * sc->intr_coal_delay);
2886 	}
2887 	if (old_down_cnt == sc->down_cnt) {
2888 		device_printf(sc->dev, "never got down irq\n");
2889 	}
2890 
2891 	mxge_free_mbufs(sc);
2892 
2893 	while (!SLIST_EMPTY(&sc->lro_free)) {
2894 		lro_entry = SLIST_FIRST(&sc->lro_free);
2895 		SLIST_REMOVE_HEAD(&sc->lro_free, next);
2896 	}
2897 	return 0;
2898 }
2899 
2900 static void
2901 mxge_setup_cfg_space(mxge_softc_t *sc)
2902 {
2903 	device_t dev = sc->dev;
2904 	int reg;
2905 	uint16_t cmd, lnk, pectl;
2906 
2907 	/* find the PCIe link width and set max read request to 4KB*/
2908 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
2909 		lnk = pci_read_config(dev, reg + 0x12, 2);
2910 		sc->link_width = (lnk >> 4) & 0x3f;
2911 
2912 		pectl = pci_read_config(dev, reg + 0x8, 2);
2913 		pectl = (pectl & ~0x7000) | (5 << 12);
2914 		pci_write_config(dev, reg + 0x8, pectl, 2);
2915 	}
2916 
2917 	/* Enable DMA and Memory space access */
2918 	pci_enable_busmaster(dev);
2919 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2920 	cmd |= PCIM_CMD_MEMEN;
2921 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2922 }
2923 
2924 static uint32_t
2925 mxge_read_reboot(mxge_softc_t *sc)
2926 {
2927 	device_t dev = sc->dev;
2928 	uint32_t vs;
2929 
2930 	/* find the vendor specific offset */
2931 	if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
2932 		device_printf(sc->dev,
2933 			      "could not find vendor specific offset\n");
2934 		return (uint32_t)-1;
2935 	}
2936 	/* enable read32 mode */
2937 	pci_write_config(dev, vs + 0x10, 0x3, 1);
2938 	/* tell NIC which register to read */
2939 	pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
2940 	return (pci_read_config(dev, vs + 0x14, 4));
2941 }
2942 
2943 static void
2944 mxge_watchdog_reset(mxge_softc_t *sc)
2945 {
2946 	int err;
2947 	uint32_t reboot;
2948 	uint16_t cmd;
2949 
2950 	err = ENXIO;
2951 
2952 	device_printf(sc->dev, "Watchdog reset!\n");
2953 
2954 	/*
2955 	 * check to see if the NIC rebooted.  If it did, then all of
2956 	 * PCI config space has been reset, and things like the
2957 	 * busmaster bit will be zero.  If this is the case, then we
2958 	 * must restore PCI config space before the NIC can be used
2959 	 * again
2960 	 */
2961 	cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
2962 	if (cmd == 0xffff) {
2963 		/*
2964 		 * maybe the watchdog caught the NIC rebooting; wait
2965 		 * up to 100ms for it to finish.  If it does not come
2966 		 * back, then give up
2967 		 */
2968 		DELAY(1000*100);
2969 		cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
2970 		if (cmd == 0xffff) {
2971 			device_printf(sc->dev, "NIC disappeared!\n");
2972 			goto abort;
2973 		}
2974 	}
2975 	if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
2976 		/* print the reboot status */
2977 		reboot = mxge_read_reboot(sc);
2978 		device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
2979 			      reboot);
2980 		/* restore PCI configuration space */
2981 
2982 		/* XXXX waiting for pci_cfg_restore() to be exported */
2983 		goto abort; /* just abort for now */
2984 
2985 		/* and redo any changes we made to our config space */
2986 		mxge_setup_cfg_space(sc);
2987 	} else {
2988 		device_printf(sc->dev, "NIC did not reboot, ring state:\n");
2989 		device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
2990 			      sc->tx.req, sc->tx.done);
2991 		device_printf(sc->dev, "pkt_done=%d fw=%d\n",
2992 			      sc->tx.pkt_done,
2993 			      be32toh(sc->fw_stats->send_done_count));
2994 	}
2995 
2996 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
2997 		mxge_close(sc);
2998 		err = mxge_open(sc);
2999 	}
3000 
3001 abort:
3002 	/*
3003 	 * stop the watchdog if the nic is dead, to avoid spamming the
3004 	 * console
3005 	 */
3006 	if (err != 0) {
3007 		callout_stop(&sc->co_hdl);
3008 	}
3009 }
3010 
3011 static void
3012 mxge_watchdog(mxge_softc_t *sc)
3013 {
3014 	mxge_tx_buf_t *tx = &sc->tx;
3015 
3016 	/* see if we have outstanding transmits, which
3017 	   have been pending for more than mxge_ticks */
3018 	if (tx->req != tx->done &&
3019 	    tx->watchdog_req != tx->watchdog_done &&
3020 	    tx->done == tx->watchdog_done)
3021 		mxge_watchdog_reset(sc);
3022 
3023 	tx->watchdog_req = tx->req;
3024 	tx->watchdog_done = tx->done;
3025 }
3026 
3027 static void
3028 mxge_tick(void *arg)
3029 {
3030 	mxge_softc_t *sc = arg;
3031 
3032 
3033 	/* Synchronize with possible callout reset/stop. */
3034 	if (callout_pending(&sc->co_hdl) ||
3035 	    !callout_active(&sc->co_hdl)) {
3036 		mtx_unlock(&sc->driver_mtx);
3037 		return;
3038 	}
3039 
3040 	callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3041 	mxge_watchdog(sc);
3042 }
3043 
3044 static int
3045 mxge_media_change(struct ifnet *ifp)
3046 {
3047 	return EINVAL;
3048 }
3049 
3050 static int
3051 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3052 {
3053 	struct ifnet *ifp = sc->ifp;
3054 	int real_mtu, old_mtu;
3055 	int err = 0;
3056 
3057 
3058 	real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3059 	if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3060 		return EINVAL;
3061 	mtx_lock(&sc->driver_mtx);
3062 	old_mtu = ifp->if_mtu;
3063 	ifp->if_mtu = mtu;
3064 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3065 		callout_stop(&sc->co_hdl);
3066 		mxge_close(sc);
3067 		err = mxge_open(sc);
3068 		if (err != 0) {
3069 			ifp->if_mtu = old_mtu;
3070 			mxge_close(sc);
3071 			(void) mxge_open(sc);
3072 		}
3073 		callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3074 	}
3075 	mtx_unlock(&sc->driver_mtx);
3076 	return err;
3077 }
3078 
3079 static void
3080 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3081 {
3082 	mxge_softc_t *sc = ifp->if_softc;
3083 
3084 
3085 	if (sc == NULL)
3086 		return;
3087 	ifmr->ifm_status = IFM_AVALID;
3088 	ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0;
3089 	ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3090 	ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0;
3091 }
3092 
3093 static int
3094 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3095 {
3096 	mxge_softc_t *sc = ifp->if_softc;
3097 	struct ifreq *ifr = (struct ifreq *)data;
3098 	int err, mask;
3099 
3100 	err = 0;
3101 	switch (command) {
3102 	case SIOCSIFADDR:
3103 	case SIOCGIFADDR:
3104 		err = ether_ioctl(ifp, command, data);
3105 		break;
3106 
3107 	case SIOCSIFMTU:
3108 		err = mxge_change_mtu(sc, ifr->ifr_mtu);
3109 		break;
3110 
3111 	case SIOCSIFFLAGS:
3112 		mtx_lock(&sc->driver_mtx);
3113 		if (ifp->if_flags & IFF_UP) {
3114 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3115 				err = mxge_open(sc);
3116 				callout_reset(&sc->co_hdl, mxge_ticks,
3117 					      mxge_tick, sc);
3118 			} else {
3119 				/* take care of promis can allmulti
3120 				   flag chages */
3121 				mxge_change_promisc(sc,
3122 						    ifp->if_flags & IFF_PROMISC);
3123 				mxge_set_multicast_list(sc);
3124 			}
3125 		} else {
3126 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3127 				mxge_close(sc);
3128 				callout_stop(&sc->co_hdl);
3129 			}
3130 		}
3131 		mtx_unlock(&sc->driver_mtx);
3132 		break;
3133 
3134 	case SIOCADDMULTI:
3135 	case SIOCDELMULTI:
3136 		mtx_lock(&sc->driver_mtx);
3137 		mxge_set_multicast_list(sc);
3138 		mtx_unlock(&sc->driver_mtx);
3139 		break;
3140 
3141 	case SIOCSIFCAP:
3142 		mtx_lock(&sc->driver_mtx);
3143 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3144 		if (mask & IFCAP_TXCSUM) {
3145 			if (IFCAP_TXCSUM & ifp->if_capenable) {
3146 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3147 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3148 						      | CSUM_TSO);
3149 			} else {
3150 				ifp->if_capenable |= IFCAP_TXCSUM;
3151 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3152 			}
3153 		} else if (mask & IFCAP_RXCSUM) {
3154 			if (IFCAP_RXCSUM & ifp->if_capenable) {
3155 				ifp->if_capenable &= ~IFCAP_RXCSUM;
3156 				sc->csum_flag = 0;
3157 			} else {
3158 				ifp->if_capenable |= IFCAP_RXCSUM;
3159 				sc->csum_flag = 1;
3160 			}
3161 		}
3162 		if (mask & IFCAP_TSO4) {
3163 			if (IFCAP_TSO4 & ifp->if_capenable) {
3164 				ifp->if_capenable &= ~IFCAP_TSO4;
3165 				ifp->if_hwassist &= ~CSUM_TSO;
3166 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
3167 				ifp->if_capenable |= IFCAP_TSO4;
3168 				ifp->if_hwassist |= CSUM_TSO;
3169 			} else {
3170 				printf("mxge requires tx checksum offload"
3171 				       " be enabled to use TSO\n");
3172 				err = EINVAL;
3173 			}
3174 		}
3175 		if (mask & IFCAP_LRO) {
3176 			if (IFCAP_LRO & ifp->if_capenable)
3177 				err = mxge_change_lro_locked(sc, 0);
3178 			else
3179 				err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3180 		}
3181 		if (mask & IFCAP_VLAN_HWTAGGING)
3182 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3183 		mtx_unlock(&sc->driver_mtx);
3184 		VLAN_CAPABILITIES(ifp);
3185 
3186 		break;
3187 
3188 	case SIOCGIFMEDIA:
3189 		err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3190 				    &sc->media, command);
3191                 break;
3192 
3193 	default:
3194 		err = ENOTTY;
3195         }
3196 	return err;
3197 }
3198 
3199 static void
3200 mxge_fetch_tunables(mxge_softc_t *sc)
3201 {
3202 
3203 	TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
3204 			  &mxge_flow_control);
3205 	TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
3206 			  &mxge_intr_coal_delay);
3207 	TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
3208 			  &mxge_nvidia_ecrc_enable);
3209 	TUNABLE_INT_FETCH("hw.mxge.force_firmware",
3210 			  &mxge_force_firmware);
3211 	TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
3212 			  &mxge_deassert_wait);
3213 	TUNABLE_INT_FETCH("hw.mxge.verbose",
3214 			  &mxge_verbose);
3215 	TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
3216 	TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
3217 	printf("%d %d\n", sc->lro_cnt, mxge_lro_cnt);
3218 	if (sc->lro_cnt != 0)
3219 		mxge_lro_cnt = sc->lro_cnt;
3220 
3221 	if (bootverbose)
3222 		mxge_verbose = 1;
3223 	if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
3224 		mxge_intr_coal_delay = 30;
3225 	if (mxge_ticks == 0)
3226 		mxge_ticks = hz;
3227 	sc->pause = mxge_flow_control;
3228 
3229 }
3230 
3231 static int
3232 mxge_attach(device_t dev)
3233 {
3234 	mxge_softc_t *sc = device_get_softc(dev);
3235 	struct ifnet *ifp;
3236 	int count, rid, err;
3237 
3238 	sc->dev = dev;
3239 	mxge_fetch_tunables(sc);
3240 
3241 	err = bus_dma_tag_create(NULL,			/* parent */
3242 				 1,			/* alignment */
3243 				 4096,			/* boundary */
3244 				 BUS_SPACE_MAXADDR,	/* low */
3245 				 BUS_SPACE_MAXADDR,	/* high */
3246 				 NULL, NULL,		/* filter */
3247 				 65536 + 256,		/* maxsize */
3248 				 MXGE_MAX_SEND_DESC, 	/* num segs */
3249 				 4096,			/* maxsegsize */
3250 				 0,			/* flags */
3251 				 NULL, NULL,		/* lock */
3252 				 &sc->parent_dmat);	/* tag */
3253 
3254 	if (err != 0) {
3255 		device_printf(sc->dev, "Err %d allocating parent dmat\n",
3256 			      err);
3257 		goto abort_with_nothing;
3258 	}
3259 
3260 	ifp = sc->ifp = if_alloc(IFT_ETHER);
3261 	if (ifp == NULL) {
3262 		device_printf(dev, "can not if_alloc()\n");
3263 		err = ENOSPC;
3264 		goto abort_with_parent_dmat;
3265 	}
3266 	snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
3267 		 device_get_nameunit(dev));
3268 	mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
3269 	snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx",
3270 		 device_get_nameunit(dev));
3271 	mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF);
3272 	snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
3273 		 "%s:drv", device_get_nameunit(dev));
3274 	mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
3275 		 MTX_NETWORK_LOCK, MTX_DEF);
3276 
3277 	callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
3278 
3279 	mxge_setup_cfg_space(sc);
3280 
3281 	/* Map the board into the kernel */
3282 	rid = PCIR_BARS;
3283 	sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
3284 					 ~0, 1, RF_ACTIVE);
3285 	if (sc->mem_res == NULL) {
3286 		device_printf(dev, "could not map memory\n");
3287 		err = ENXIO;
3288 		goto abort_with_lock;
3289 	}
3290 	sc->sram = rman_get_virtual(sc->mem_res);
3291 	sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
3292 	if (sc->sram_size > rman_get_size(sc->mem_res)) {
3293 		device_printf(dev, "impossible memory region size %ld\n",
3294 			      rman_get_size(sc->mem_res));
3295 		err = ENXIO;
3296 		goto abort_with_mem_res;
3297 	}
3298 
3299 	/* make NULL terminated copy of the EEPROM strings section of
3300 	   lanai SRAM */
3301 	bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
3302 	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
3303 				rman_get_bushandle(sc->mem_res),
3304 				sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
3305 				sc->eeprom_strings,
3306 				MXGE_EEPROM_STRINGS_SIZE - 2);
3307 	err = mxge_parse_strings(sc);
3308 	if (err != 0)
3309 		goto abort_with_mem_res;
3310 
3311 	/* Enable write combining for efficient use of PCIe bus */
3312 	mxge_enable_wc(sc);
3313 
3314 	/* Allocate the out of band dma memory */
3315 	err = mxge_dma_alloc(sc, &sc->cmd_dma,
3316 			     sizeof (mxge_cmd_t), 64);
3317 	if (err != 0)
3318 		goto abort_with_mem_res;
3319 	sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
3320 	err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
3321 	if (err != 0)
3322 		goto abort_with_cmd_dma;
3323 
3324 	err = mxge_dma_alloc(sc, &sc->fw_stats_dma,
3325 			     sizeof (*sc->fw_stats), 64);
3326 	if (err != 0)
3327 		goto abort_with_zeropad_dma;
3328 	sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr;
3329 
3330 	err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
3331 	if (err != 0)
3332 		goto abort_with_fw_stats;
3333 
3334 	/* Add our ithread  */
3335 	count = pci_msi_count(dev);
3336 	if (count == 1 && pci_alloc_msi(dev, &count) == 0) {
3337 		rid = 1;
3338 		sc->msi_enabled = 1;
3339 	} else {
3340 		rid = 0;
3341 	}
3342 	sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
3343 					 1, RF_SHAREABLE | RF_ACTIVE);
3344 	if (sc->irq_res == NULL) {
3345 		device_printf(dev, "could not alloc interrupt\n");
3346 		goto abort_with_dmabench;
3347 	}
3348 	if (mxge_verbose)
3349 		device_printf(dev, "using %s irq %ld\n",
3350 			      sc->msi_enabled ? "MSI" : "INTx",
3351 			      rman_get_start(sc->irq_res));
3352 	/* select & load the firmware */
3353 	err = mxge_select_firmware(sc);
3354 	if (err != 0)
3355 		goto abort_with_irq_res;
3356 	sc->intr_coal_delay = mxge_intr_coal_delay;
3357 	err = mxge_reset(sc, 0);
3358 	if (err != 0)
3359 		goto abort_with_irq_res;
3360 
3361 	err = mxge_alloc_rings(sc);
3362 	if (err != 0) {
3363 		device_printf(sc->dev, "failed to allocate rings\n");
3364 		goto abort_with_irq_res;
3365 	}
3366 
3367 	err = bus_setup_intr(sc->dev, sc->irq_res,
3368 			     INTR_TYPE_NET | INTR_MPSAFE,
3369 			     NULL, mxge_intr, sc, &sc->ih);
3370 	if (err != 0) {
3371 		goto abort_with_rings;
3372 	}
3373 	/* hook into the network stack */
3374 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3375 	ifp->if_baudrate = 100000000;
3376 	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
3377 		IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
3378 		IFCAP_VLAN_HWCSUM | IFCAP_LRO;
3379 
3380 	sc->max_mtu = mxge_max_mtu(sc);
3381 	if (sc->max_mtu >= 9000)
3382 		ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3383 	else
3384 		device_printf(dev, "MTU limited to %d.  Install "
3385 			      "latest firmware for 9000 byte jumbo support\n",
3386 			      sc->max_mtu - ETHER_HDR_LEN);
3387 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
3388 	ifp->if_capenable = ifp->if_capabilities;
3389 	if (sc->lro_cnt == 0)
3390 		ifp->if_capenable &= ~IFCAP_LRO;
3391 	sc->csum_flag = 1;
3392         ifp->if_init = mxge_init;
3393         ifp->if_softc = sc;
3394         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3395         ifp->if_ioctl = mxge_ioctl;
3396         ifp->if_start = mxge_start;
3397 	ether_ifattach(ifp, sc->mac_addr);
3398 	/* ether_ifattach sets mtu to 1500 */
3399 	if (ifp->if_capabilities & IFCAP_JUMBO_MTU)
3400 		ifp->if_mtu = 9000;
3401 
3402 	/* Initialise the ifmedia structure */
3403 	ifmedia_init(&sc->media, 0, mxge_media_change,
3404 		     mxge_media_status);
3405 	ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL);
3406 	mxge_add_sysctls(sc);
3407 	return 0;
3408 
3409 abort_with_rings:
3410 	mxge_free_rings(sc);
3411 abort_with_irq_res:
3412 	bus_release_resource(dev, SYS_RES_IRQ,
3413 			     sc->msi_enabled ? 1 : 0, sc->irq_res);
3414 	if (sc->msi_enabled)
3415 		pci_release_msi(dev);
3416 abort_with_dmabench:
3417 	mxge_dma_free(&sc->dmabench_dma);
3418 abort_with_fw_stats:
3419 	mxge_dma_free(&sc->fw_stats_dma);
3420 abort_with_zeropad_dma:
3421 	mxge_dma_free(&sc->zeropad_dma);
3422 abort_with_cmd_dma:
3423 	mxge_dma_free(&sc->cmd_dma);
3424 abort_with_mem_res:
3425 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
3426 abort_with_lock:
3427 	pci_disable_busmaster(dev);
3428 	mtx_destroy(&sc->cmd_mtx);
3429 	mtx_destroy(&sc->tx_mtx);
3430 	mtx_destroy(&sc->driver_mtx);
3431 	if_free(ifp);
3432 abort_with_parent_dmat:
3433 	bus_dma_tag_destroy(sc->parent_dmat);
3434 
3435 abort_with_nothing:
3436 	return err;
3437 }
3438 
3439 static int
3440 mxge_detach(device_t dev)
3441 {
3442 	mxge_softc_t *sc = device_get_softc(dev);
3443 
3444 	if (sc->ifp->if_vlantrunk != NULL) {
3445 		device_printf(sc->dev,
3446 			      "Detach vlans before removing module\n");
3447 		return EBUSY;
3448 	}
3449 	mtx_lock(&sc->driver_mtx);
3450 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
3451 		mxge_close(sc);
3452 	callout_stop(&sc->co_hdl);
3453 	mtx_unlock(&sc->driver_mtx);
3454 	ether_ifdetach(sc->ifp);
3455 	ifmedia_removeall(&sc->media);
3456 	mxge_dummy_rdma(sc, 0);
3457 	bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
3458 	mxge_free_rings(sc);
3459 	bus_release_resource(dev, SYS_RES_IRQ,
3460 			     sc->msi_enabled ? 1 : 0, sc->irq_res);
3461 	if (sc->msi_enabled)
3462 		pci_release_msi(dev);
3463 
3464 	sc->rx_done.entry = NULL;
3465 	mxge_dma_free(&sc->fw_stats_dma);
3466 	mxge_dma_free(&sc->dmabench_dma);
3467 	mxge_dma_free(&sc->zeropad_dma);
3468 	mxge_dma_free(&sc->cmd_dma);
3469 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
3470 	pci_disable_busmaster(dev);
3471 	mtx_destroy(&sc->cmd_mtx);
3472 	mtx_destroy(&sc->tx_mtx);
3473 	mtx_destroy(&sc->driver_mtx);
3474 	if_free(sc->ifp);
3475 	bus_dma_tag_destroy(sc->parent_dmat);
3476 	return 0;
3477 }
3478 
3479 static int
3480 mxge_shutdown(device_t dev)
3481 {
3482 	return 0;
3483 }
3484 
3485 /*
3486   This file uses Myri10GE driver indentation.
3487 
3488   Local Variables:
3489   c-file-style:"linux"
3490   tab-width:8
3491   End:
3492 */
3493