xref: /freebsd/sys/dev/mxge/if_mxge.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /******************************************************************************
2 
3 Copyright (c) 2006, Myricom Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Myricom Inc, nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/linker.h>
40 #include <sys/firmware.h>
41 #include <sys/endian.h>
42 #include <sys/sockio.h>
43 #include <sys/mbuf.h>
44 #include <sys/malloc.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/module.h>
48 #include <sys/memrange.h>
49 #include <sys/socket.h>
50 #include <sys/sysctl.h>
51 #include <sys/sx.h>
52 
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/ethernet.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 
59 #include <net/bpf.h>
60 
61 #include <net/if_types.h>
62 #include <net/if_vlan_var.h>
63 #include <net/zlib.h>
64 
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 
69 #include <machine/bus.h>
70 #include <machine/resource.h>
71 #include <sys/bus.h>
72 #include <sys/rman.h>
73 
74 #include <dev/pci/pcireg.h>
75 #include <dev/pci/pcivar.h>
76 
77 #include <vm/vm.h>		/* for pmap_mapdev() */
78 #include <vm/pmap.h>
79 
80 #include <dev/mxge/mxge_mcp.h>
81 #include <dev/mxge/mcp_gen_header.h>
82 #include <dev/mxge/if_mxge_var.h>
83 
84 /* tunable params */
85 static int mxge_nvidia_ecrc_enable = 1;
86 static int mxge_max_intr_slots = 1024;
87 static int mxge_intr_coal_delay = 30;
88 static int mxge_deassert_wait = 1;
89 static int mxge_flow_control = 1;
90 static int mxge_verbose = 0;
91 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
92 static char *mxge_fw_aligned = "mxge_eth_z8e";
93 
94 static int mxge_probe(device_t dev);
95 static int mxge_attach(device_t dev);
96 static int mxge_detach(device_t dev);
97 static int mxge_shutdown(device_t dev);
98 static void mxge_intr(void *arg);
99 
100 static device_method_t mxge_methods[] =
101 {
102   /* Device interface */
103   DEVMETHOD(device_probe, mxge_probe),
104   DEVMETHOD(device_attach, mxge_attach),
105   DEVMETHOD(device_detach, mxge_detach),
106   DEVMETHOD(device_shutdown, mxge_shutdown),
107   {0, 0}
108 };
109 
110 static driver_t mxge_driver =
111 {
112   "mxge",
113   mxge_methods,
114   sizeof(mxge_softc_t),
115 };
116 
117 static devclass_t mxge_devclass;
118 
119 /* Declare ourselves to be a child of the PCI bus.*/
120 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
121 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
122 
123 static int
124 mxge_probe(device_t dev)
125 {
126   if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
127       (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) {
128 	  device_set_desc(dev, "Myri10G-PCIE-8A");
129 	  return 0;
130   }
131   return ENXIO;
132 }
133 
134 static void
135 mxge_enable_wc(mxge_softc_t *sc)
136 {
137 	struct mem_range_desc mrdesc;
138 	vm_paddr_t pa;
139 	vm_offset_t len;
140 	int err, action;
141 
142 	pa = rman_get_start(sc->mem_res);
143 	len = rman_get_size(sc->mem_res);
144 	mrdesc.mr_base = pa;
145 	mrdesc.mr_len = len;
146 	mrdesc.mr_flags = MDF_WRITECOMBINE;
147 	action = MEMRANGE_SET_UPDATE;
148 	strcpy((char *)&mrdesc.mr_owner, "mxge");
149 	err = mem_range_attr_set(&mrdesc, &action);
150 	if (err != 0) {
151 		device_printf(sc->dev,
152 			      "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n",
153 			      (unsigned long)pa, (unsigned long)len, err);
154 	} else {
155 		sc->wc = 1;
156 	}
157 }
158 
159 
160 /* callback to get our DMA address */
161 static void
162 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
163 			 int error)
164 {
165 	if (error == 0) {
166 		*(bus_addr_t *) arg = segs->ds_addr;
167 	}
168 }
169 
170 static int
171 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
172 		   bus_size_t alignment)
173 {
174 	int err;
175 	device_t dev = sc->dev;
176 
177 	/* allocate DMAable memory tags */
178 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
179 				 alignment,		/* alignment */
180 				 4096,			/* boundary */
181 				 BUS_SPACE_MAXADDR,	/* low */
182 				 BUS_SPACE_MAXADDR,	/* high */
183 				 NULL, NULL,		/* filter */
184 				 bytes,			/* maxsize */
185 				 1,			/* num segs */
186 				 4096,			/* maxsegsize */
187 				 BUS_DMA_COHERENT,	/* flags */
188 				 NULL, NULL,		/* lock */
189 				 &dma->dmat);		/* tag */
190 	if (err != 0) {
191 		device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
192 		return err;
193 	}
194 
195 	/* allocate DMAable memory & map */
196 	err = bus_dmamem_alloc(dma->dmat, &dma->addr,
197 			       (BUS_DMA_WAITOK | BUS_DMA_COHERENT
198 				| BUS_DMA_ZERO),  &dma->map);
199 	if (err != 0) {
200 		device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
201 		goto abort_with_dmat;
202 	}
203 
204 	/* load the memory */
205 	err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
206 			      mxge_dmamap_callback,
207 			      (void *)&dma->bus_addr, 0);
208 	if (err != 0) {
209 		device_printf(dev, "couldn't load map (err = %d)\n", err);
210 		goto abort_with_mem;
211 	}
212 	return 0;
213 
214 abort_with_mem:
215 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
216 abort_with_dmat:
217 	(void)bus_dma_tag_destroy(dma->dmat);
218 	return err;
219 }
220 
221 
222 static void
223 mxge_dma_free(mxge_dma_t *dma)
224 {
225 	bus_dmamap_unload(dma->dmat, dma->map);
226 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
227 	(void)bus_dma_tag_destroy(dma->dmat);
228 }
229 
230 /*
231  * The eeprom strings on the lanaiX have the format
232  * SN=x\0
233  * MAC=x:x:x:x:x:x\0
234  * PC=text\0
235  */
236 
237 static int
238 mxge_parse_strings(mxge_softc_t *sc)
239 {
240 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
241 
242 	char *ptr, *limit;
243 	int i, found_mac;
244 
245 	ptr = sc->eeprom_strings;
246 	limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
247 	found_mac = 0;
248 	while (ptr < limit && *ptr != '\0') {
249 		if (memcmp(ptr, "MAC=", 4) == 0) {
250 			ptr += 1;
251 			sc->mac_addr_string = ptr;
252 			for (i = 0; i < 6; i++) {
253 				ptr += 3;
254 				if ((ptr + 2) > limit)
255 					goto abort;
256 				sc->mac_addr[i] = strtoul(ptr, NULL, 16);
257 				found_mac = 1;
258 			}
259 		} else if (memcmp(ptr, "PC=", 3) == 0) {
260 			ptr += 3;
261 			strncpy(sc->product_code_string, ptr,
262 				sizeof (sc->product_code_string) - 1);
263 		} else if (memcmp(ptr, "SN=", 3) == 0) {
264 			ptr += 3;
265 			strncpy(sc->serial_number_string, ptr,
266 				sizeof (sc->serial_number_string) - 1);
267 		}
268 		MXGE_NEXT_STRING(ptr);
269 	}
270 
271 	if (found_mac)
272 		return 0;
273 
274  abort:
275 	device_printf(sc->dev, "failed to parse eeprom_strings\n");
276 
277 	return ENXIO;
278 }
279 
280 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
281 static int
282 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev)
283 {
284 	uint32_t val;
285 	unsigned long off;
286 	char *va, *cfgptr;
287 	uint16_t vendor_id, device_id;
288 	uintptr_t bus, slot, func, ivend, idev;
289 	uint32_t *ptr32;
290 
291 	/* XXXX
292 	   Test below is commented because it is believed that doing
293 	   config read/write beyond 0xff will access the config space
294 	   for the next larger function.  Uncomment this and remove
295 	   the hacky pmap_mapdev() way of accessing config space when
296 	   FreeBSD grows support for extended pcie config space access
297 	*/
298 #if 0
299 	/* See if we can, by some miracle, access the extended
300 	   config space */
301 	val = pci_read_config(pdev, 0x178, 4);
302 	if (val != 0xffffffff) {
303 		val |= 0x40;
304 		pci_write_config(pdev, 0x178, val, 4);
305 		return 0;
306 	}
307 #endif
308 	/* Rather than using normal pci config space writes, we must
309 	 * map the Nvidia config space ourselves.  This is because on
310 	 * opteron/nvidia class machine the 0xe000000 mapping is
311 	 * handled by the nvidia chipset, that means the internal PCI
312 	 * device (the on-chip northbridge), or the amd-8131 bridge
313 	 * and things behind them are not visible by this method.
314 	 */
315 
316 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
317 		      PCI_IVAR_BUS, &bus);
318 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
319 		      PCI_IVAR_SLOT, &slot);
320 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
321 		      PCI_IVAR_FUNCTION, &func);
322 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
323 		      PCI_IVAR_VENDOR, &ivend);
324 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
325 		      PCI_IVAR_DEVICE, &idev);
326 
327 	off =  0xe0000000UL
328 		+ 0x00100000UL * (unsigned long)bus
329 		+ 0x00001000UL * (unsigned long)(func
330 						 + 8 * slot);
331 
332 	/* map it into the kernel */
333 	va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
334 
335 
336 	if (va == NULL) {
337 		device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
338 		return EIO;
339 	}
340 	/* get a pointer to the config space mapped into the kernel */
341 	cfgptr = va + (off & PAGE_MASK);
342 
343 	/* make sure that we can really access it */
344 	vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
345 	device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
346 	if (! (vendor_id == ivend && device_id == idev)) {
347 		device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
348 			      vendor_id, device_id);
349 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
350 		return EIO;
351 	}
352 
353 	ptr32 = (uint32_t*)(cfgptr + 0x178);
354 	val = *ptr32;
355 
356 	if (val == 0xffffffff) {
357 		device_printf(sc->dev, "extended mapping failed\n");
358 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
359 		return EIO;
360 	}
361 	*ptr32 = val | 0x40;
362 	pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
363 	if (mxge_verbose)
364 		device_printf(sc->dev,
365 			      "Enabled ECRC on upstream Nvidia bridge "
366 			      "at %d:%d:%d\n",
367 			      (int)bus, (int)slot, (int)func);
368 	return 0;
369 }
370 #else
371 static int
372 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev)
373 {
374 	device_printf(sc->dev,
375 		      "Nforce 4 chipset on non-x86/amd64!?!?!\n");
376 	return ENXIO;
377 }
378 #endif
379 /*
380  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
381  * when the PCI-E Completion packets are aligned on an 8-byte
382  * boundary.  Some PCI-E chip sets always align Completion packets; on
383  * the ones that do not, the alignment can be enforced by enabling
384  * ECRC generation (if supported).
385  *
386  * When PCI-E Completion packets are not aligned, it is actually more
387  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
388  *
389  * If the driver can neither enable ECRC nor verify that it has
390  * already been enabled, then it must use a firmware image which works
391  * around unaligned completion packets (ethp_z8e.dat), and it should
392  * also ensure that it never gives the device a Read-DMA which is
393  * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
394  * enabled, then the driver should use the aligned (eth_z8e.dat)
395  * firmware image, and set tx.boundary to 4KB.
396  */
397 
398 static void
399 mxge_select_firmware(mxge_softc_t *sc)
400 {
401 	int err, aligned = 0;
402 	device_t pdev;
403 	uint16_t pvend, pdid;
404 
405 	pdev = device_get_parent(device_get_parent(sc->dev));
406 	if (pdev == NULL) {
407 		device_printf(sc->dev, "could not find parent?\n");
408 		goto abort;
409 	}
410 	pvend = pci_read_config(pdev, PCIR_VENDOR, 2);
411 	pdid = pci_read_config(pdev, PCIR_DEVICE, 2);
412 
413 	/* see if we can enable ECRC's on an upstream
414 	   Nvidia bridge */
415 	if (mxge_nvidia_ecrc_enable &&
416 	    (pvend == 0x10de && pdid == 0x005d)) {
417 		err = mxge_enable_nvidia_ecrc(sc, pdev);
418 		if (err == 0) {
419 			aligned = 1;
420 			if (mxge_verbose)
421 				device_printf(sc->dev,
422 					      "Assuming aligned completions"
423 					      " (ECRC)\n");
424 		}
425 	}
426 	/* see if the upstream bridge is known to
427 	   provided aligned completions */
428 	if (/* HT2000  */ (pvend == 0x1166 && pdid == 0x0132) ||
429 	    /* Ontario */ (pvend == 0x10b5 && pdid == 0x8532)) {
430 		if (mxge_verbose)
431 			device_printf(sc->dev,
432 				      "Assuming aligned completions "
433 				      "(0x%x:0x%x)\n", pvend, pdid);
434 	}
435 
436 abort:
437 	if (aligned) {
438 		sc->fw_name = mxge_fw_aligned;
439 		sc->tx.boundary = 4096;
440 	} else {
441 		sc->fw_name = mxge_fw_unaligned;
442 		sc->tx.boundary = 2048;
443 	}
444 }
445 
446 union qualhack
447 {
448         const char *ro_char;
449         char *rw_char;
450 };
451 
452 
453 static int
454 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
455 {
456 	struct firmware *fw;
457 	const mcp_gen_header_t *hdr;
458 	unsigned hdr_offset;
459 	const char *fw_data;
460 	union qualhack hack;
461 	int status;
462 
463 
464 	fw = firmware_get(sc->fw_name);
465 
466 	if (fw == NULL) {
467 		device_printf(sc->dev, "Could not find firmware image %s\n",
468 			      sc->fw_name);
469 		return ENOENT;
470 	}
471 	if (fw->datasize > *limit ||
472 	    fw->datasize < MCP_HEADER_PTR_OFFSET + 4) {
473 		device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n",
474 			      sc->fw_name, (int)fw->datasize, (int) *limit);
475 		status = ENOSPC;
476 		goto abort_with_fw;
477 	}
478 	*limit = fw->datasize;
479 
480 	/* check id */
481 	fw_data = (const char *)fw->data;
482 	hdr_offset = htobe32(*(const uint32_t *)
483 			     (fw_data + MCP_HEADER_PTR_OFFSET));
484 	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) {
485 		device_printf(sc->dev, "Bad firmware file");
486 		status = EIO;
487 		goto abort_with_fw;
488 	}
489 	hdr = (const void*)(fw_data + hdr_offset);
490 	if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
491 		device_printf(sc->dev, "Bad firmware type: 0x%x\n",
492 			      be32toh(hdr->mcp_type));
493 		status = EIO;
494 		goto abort_with_fw;
495 	}
496 
497 	/* save firmware version for sysctl */
498 	strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
499 	if (mxge_verbose)
500 		device_printf(sc->dev, "firmware id: %s\n", hdr->version);
501 
502 	hack.ro_char = fw_data;
503 	/* Copy the inflated firmware to NIC SRAM. */
504 	mxge_pio_copy(&sc->sram[MXGE_FW_OFFSET], hack.rw_char,  *limit);
505 
506 	status = 0;
507 abort_with_fw:
508 	firmware_put(fw, FIRMWARE_UNLOAD);
509 	return status;
510 }
511 
512 /*
513  * Enable or disable periodic RDMAs from the host to make certain
514  * chipsets resend dropped PCIe messages
515  */
516 
517 static void
518 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
519 {
520 	char buf_bytes[72];
521 	volatile uint32_t *confirm;
522 	volatile char *submit;
523 	uint32_t *buf, dma_low, dma_high;
524 	int i;
525 
526 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
527 
528 	/* clear confirmation addr */
529 	confirm = (volatile uint32_t *)sc->cmd;
530 	*confirm = 0;
531 	mb();
532 
533 	/* send an rdma command to the PCIe engine, and wait for the
534 	   response in the confirmation address.  The firmware should
535 	   write a -1 there to indicate it is alive and well
536 	*/
537 
538 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
539 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
540 	buf[0] = htobe32(dma_high);		/* confirm addr MSW */
541 	buf[1] = htobe32(dma_low);		/* confirm addr LSW */
542 	buf[2] = htobe32(0xffffffff);		/* confirm data */
543 	dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
544 	dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
545 	buf[3] = htobe32(dma_high); 		/* dummy addr MSW */
546 	buf[4] = htobe32(dma_low); 		/* dummy addr LSW */
547 	buf[5] = htobe32(enable);			/* enable? */
548 
549 
550 	submit = (volatile char *)(sc->sram + 0xfc01c0);
551 
552 	mxge_pio_copy(submit, buf, 64);
553 	mb();
554 	DELAY(1000);
555 	mb();
556 	i = 0;
557 	while (*confirm != 0xffffffff && i < 20) {
558 		DELAY(1000);
559 		i++;
560 	}
561 	if (*confirm != 0xffffffff) {
562 		device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
563 			      (enable ? "enable" : "disable"), confirm,
564 			      *confirm);
565 	}
566 	return;
567 }
568 
569 static int
570 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
571 {
572 	mcp_cmd_t *buf;
573 	char buf_bytes[sizeof(*buf) + 8];
574 	volatile mcp_cmd_response_t *response = sc->cmd;
575 	volatile char *cmd_addr = sc->sram + MXGEFW_CMD_OFFSET;
576 	uint32_t dma_low, dma_high;
577 	int sleep_total = 0;
578 
579 	/* ensure buf is aligned to 8 bytes */
580 	buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
581 
582 	buf->data0 = htobe32(data->data0);
583 	buf->data1 = htobe32(data->data1);
584 	buf->data2 = htobe32(data->data2);
585 	buf->cmd = htobe32(cmd);
586 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
587 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
588 
589 	buf->response_addr.low = htobe32(dma_low);
590 	buf->response_addr.high = htobe32(dma_high);
591 	mtx_lock(&sc->cmd_lock);
592 	response->result = 0xffffffff;
593 	mb();
594 	mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
595 
596 	/* wait up to 20ms */
597 	for (sleep_total = 0; sleep_total <  20; sleep_total++) {
598 		bus_dmamap_sync(sc->cmd_dma.dmat,
599 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
600 		mb();
601 		if (response->result != 0xffffffff) {
602 			if (response->result == 0) {
603 				data->data0 = be32toh(response->data);
604 				mtx_unlock(&sc->cmd_lock);
605 				return 0;
606 			} else {
607 				device_printf(sc->dev,
608 					      "mxge: command %d "
609 					      "failed, result = %d\n",
610 					      cmd, be32toh(response->result));
611 				mtx_unlock(&sc->cmd_lock);
612 				return ENXIO;
613 			}
614 		}
615 		DELAY(1000);
616 	}
617 	mtx_unlock(&sc->cmd_lock);
618 	device_printf(sc->dev, "mxge: command %d timed out"
619 		      "result = %d\n",
620 		      cmd, be32toh(response->result));
621 	return EAGAIN;
622 }
623 
624 
625 static int
626 mxge_load_firmware(mxge_softc_t *sc)
627 {
628 	volatile uint32_t *confirm;
629 	volatile char *submit;
630 	char buf_bytes[72];
631 	uint32_t *buf, size, dma_low, dma_high;
632 	int status, i;
633 
634 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
635 
636 	size = sc->sram_size;
637 	status = mxge_load_firmware_helper(sc, &size);
638 	if (status) {
639 		device_printf(sc->dev, "firmware loading failed\n");
640 		return status;
641 	}
642 	/* clear confirmation addr */
643 	confirm = (volatile uint32_t *)sc->cmd;
644 	*confirm = 0;
645 	mb();
646 	/* send a reload command to the bootstrap MCP, and wait for the
647 	   response in the confirmation address.  The firmware should
648 	   write a -1 there to indicate it is alive and well
649 	*/
650 
651 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
652 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
653 
654 	buf[0] = htobe32(dma_high);	/* confirm addr MSW */
655 	buf[1] = htobe32(dma_low);	/* confirm addr LSW */
656 	buf[2] = htobe32(0xffffffff);	/* confirm data */
657 
658 	/* FIX: All newest firmware should un-protect the bottom of
659 	   the sram before handoff. However, the very first interfaces
660 	   do not. Therefore the handoff copy must skip the first 8 bytes
661 	*/
662 					/* where the code starts*/
663 	buf[3] = htobe32(MXGE_FW_OFFSET + 8);
664 	buf[4] = htobe32(size - 8); 	/* length of code */
665 	buf[5] = htobe32(8);		/* where to copy to */
666 	buf[6] = htobe32(0);		/* where to jump to */
667 
668 	submit = (volatile char *)(sc->sram + 0xfc0000);
669 	mxge_pio_copy(submit, buf, 64);
670 	mb();
671 	DELAY(1000);
672 	mb();
673 	i = 0;
674 	while (*confirm != 0xffffffff && i < 20) {
675 		DELAY(1000*10);
676 		i++;
677 		bus_dmamap_sync(sc->cmd_dma.dmat,
678 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
679 	}
680 	if (*confirm != 0xffffffff) {
681 		device_printf(sc->dev,"handoff failed (%p = 0x%x)",
682 			confirm, *confirm);
683 
684 		return ENXIO;
685 	}
686 	mxge_dummy_rdma(sc, 1);
687 	return 0;
688 }
689 
690 static int
691 mxge_update_mac_address(mxge_softc_t *sc)
692 {
693 	mxge_cmd_t cmd;
694 	uint8_t *addr = sc->mac_addr;
695 	int status;
696 
697 
698 	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
699 		     | (addr[2] << 8) | addr[3]);
700 
701 	cmd.data1 = ((addr[4] << 8) | (addr[5]));
702 
703 	status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
704 	return status;
705 }
706 
707 static int
708 mxge_change_pause(mxge_softc_t *sc, int pause)
709 {
710 	mxge_cmd_t cmd;
711 	int status;
712 
713 	if (pause)
714 		status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
715 				       &cmd);
716 	else
717 		status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
718 				       &cmd);
719 
720 	if (status) {
721 		device_printf(sc->dev, "Failed to set flow control mode\n");
722 		return ENXIO;
723 	}
724 	sc->pause = pause;
725 	return 0;
726 }
727 
728 static void
729 mxge_change_promisc(mxge_softc_t *sc, int promisc)
730 {
731 	mxge_cmd_t cmd;
732 	int status;
733 
734 	if (promisc)
735 		status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
736 				       &cmd);
737 	else
738 		status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
739 				       &cmd);
740 
741 	if (status) {
742 		device_printf(sc->dev, "Failed to set promisc mode\n");
743 	}
744 }
745 
746 static int
747 mxge_reset(mxge_softc_t *sc)
748 {
749 
750 	mxge_cmd_t cmd;
751 	mxge_dma_t dmabench_dma;
752 	size_t bytes;
753 	int status;
754 
755 	/* try to send a reset command to the card to see if it
756 	   is alive */
757 	memset(&cmd, 0, sizeof (cmd));
758 	status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
759 	if (status != 0) {
760 		device_printf(sc->dev, "failed reset\n");
761 		return ENXIO;
762 	}
763 
764 	/* Now exchange information about interrupts  */
765 	bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);\
766 	memset(sc->rx_done.entry, 0, bytes);
767 	cmd.data0 = (uint32_t)bytes;
768 	status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
769 	cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr);
770 	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr);
771 	status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd);
772 
773 	status |= mxge_send_cmd(sc,
774 				MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
775 
776 
777 	sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
778 
779 	status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
780 	sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
781 
782 
783 	status |= mxge_send_cmd(sc,  MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
784 				&cmd);
785 	sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
786 	if (status != 0) {
787 		device_printf(sc->dev, "failed set interrupt parameters\n");
788 		return status;
789 	}
790 
791 
792 	*sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
793 
794 
795 	/* run a DMA benchmark */
796 	sc->read_dma = sc->write_dma = sc->read_write_dma = 0;
797 	status = mxge_dma_alloc(sc, &dmabench_dma, 4096, 4096);
798 	if (status)
799 		goto dmabench_fail;
800 
801 	/* Read DMA */
802 	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
803 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
804 	cmd.data2 = sc->tx.boundary * 0x10000;
805 
806 	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
807 	if (status != 0)
808 		device_printf(sc->dev, "read dma benchmark failed\n");
809 	else
810 		sc->read_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) /
811 			(cmd.data0 & 0xffff);
812 
813 	/* Write DMA */
814 	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
815 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
816 	cmd.data2 = sc->tx.boundary * 0x1;
817 	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
818 	if (status != 0)
819 		device_printf(sc->dev, "write dma benchmark failed\n");
820 	else
821 		sc->write_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) /
822 			(cmd.data0 & 0xffff);
823 	/* Read/Write DMA */
824 	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
825 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
826 	cmd.data2 = sc->tx.boundary * 0x10001;
827 	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
828 	if (status != 0)
829 		device_printf(sc->dev, "read/write dma benchmark failed\n");
830 	else
831 		sc->read_write_dma =
832 			((cmd.data0>>16) * sc->tx.boundary * 2 * 2) /
833 			(cmd.data0 & 0xffff);
834 
835 	mxge_dma_free(&dmabench_dma);
836 
837 dmabench_fail:
838 	/* reset mcp/driver shared state back to 0 */
839 	bzero(sc->rx_done.entry, bytes);
840 	sc->rx_done.idx = 0;
841 	sc->rx_done.cnt = 0;
842 	sc->tx.req = 0;
843 	sc->tx.done = 0;
844 	sc->tx.pkt_done = 0;
845 	sc->rx_big.cnt = 0;
846 	sc->rx_small.cnt = 0;
847 	sc->rdma_tags_available = 15;
848 	status = mxge_update_mac_address(sc);
849 	mxge_change_promisc(sc, 0);
850 	mxge_change_pause(sc, sc->pause);
851 	return status;
852 }
853 
854 static int
855 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
856 {
857         mxge_softc_t *sc;
858         unsigned int intr_coal_delay;
859         int err;
860 
861         sc = arg1;
862         intr_coal_delay = sc->intr_coal_delay;
863         err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
864         if (err != 0) {
865                 return err;
866         }
867         if (intr_coal_delay == sc->intr_coal_delay)
868                 return 0;
869 
870         if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
871                 return EINVAL;
872 
873 	sx_xlock(&sc->driver_lock);
874 	*sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
875 	sc->intr_coal_delay = intr_coal_delay;
876 
877 	sx_xunlock(&sc->driver_lock);
878         return err;
879 }
880 
881 static int
882 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
883 {
884         mxge_softc_t *sc;
885         unsigned int enabled;
886         int err;
887 
888         sc = arg1;
889         enabled = sc->pause;
890         err = sysctl_handle_int(oidp, &enabled, arg2, req);
891         if (err != 0) {
892                 return err;
893         }
894         if (enabled == sc->pause)
895                 return 0;
896 
897 	sx_xlock(&sc->driver_lock);
898 	err = mxge_change_pause(sc, enabled);
899 	sx_xunlock(&sc->driver_lock);
900         return err;
901 }
902 
903 static int
904 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
905 {
906         int err;
907 
908         if (arg1 == NULL)
909                 return EFAULT;
910         arg2 = be32toh(*(int *)arg1);
911         arg1 = NULL;
912         err = sysctl_handle_int(oidp, arg1, arg2, req);
913 
914         return err;
915 }
916 
917 static void
918 mxge_add_sysctls(mxge_softc_t *sc)
919 {
920 	struct sysctl_ctx_list *ctx;
921 	struct sysctl_oid_list *children;
922 	mcp_irq_data_t *fw;
923 
924 	ctx = device_get_sysctl_ctx(sc->dev);
925 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
926 	fw = sc->fw_stats;
927 
928 	/* random information */
929 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
930 		       "firmware_version",
931 		       CTLFLAG_RD, &sc->fw_version,
932 		       0, "firmware version");
933 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
934 		       "serial_number",
935 		       CTLFLAG_RD, &sc->serial_number_string,
936 		       0, "serial number");
937 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
938 		       "product_code",
939 		       CTLFLAG_RD, &sc->product_code_string,
940 		       0, "product_code");
941 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
942 		       "tx_boundary",
943 		       CTLFLAG_RD, &sc->tx.boundary,
944 		       0, "tx_boundary");
945 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
946 		       "read_dma_MBs",
947 		       CTLFLAG_RD, &sc->read_dma,
948 		       0, "DMA Read speed in MB/s");
949 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
950 		       "write_dma_MBs",
951 		       CTLFLAG_RD, &sc->write_dma,
952 		       0, "DMA Write speed in MB/s");
953 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
954 		       "read_write_dma_MBs",
955 		       CTLFLAG_RD, &sc->read_write_dma,
956 		       0, "DMA concurrent Read/Write speed in MB/s");
957 
958 
959 	/* performance related tunables */
960 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
961 			"intr_coal_delay",
962 			CTLTYPE_INT|CTLFLAG_RW, sc,
963 			0, mxge_change_intr_coal,
964 			"I", "interrupt coalescing delay in usecs");
965 
966 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
967 			"flow_control_enabled",
968 			CTLTYPE_INT|CTLFLAG_RW, sc,
969 			0, mxge_change_flow_control,
970 			"I", "interrupt coalescing delay in usecs");
971 
972 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
973 		       "deassert_wait",
974 		       CTLFLAG_RW, &mxge_deassert_wait,
975 		       0, "Wait for IRQ line to go low in ihandler");
976 
977 	/* stats block from firmware is in network byte order.
978 	   Need to swap it */
979 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
980 			"link_up",
981 			CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
982 			0, mxge_handle_be32,
983 			"I", "link up");
984 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
985 			"rdma_tags_available",
986 			CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
987 			0, mxge_handle_be32,
988 			"I", "rdma_tags_available");
989 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
990 			"dropped_link_overflow",
991 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
992 			0, mxge_handle_be32,
993 			"I", "dropped_link_overflow");
994 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
995 			"dropped_link_error_or_filtered",
996 			CTLTYPE_INT|CTLFLAG_RD,
997 			&fw->dropped_link_error_or_filtered,
998 			0, mxge_handle_be32,
999 			"I", "dropped_link_error_or_filtered");
1000 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1001 			"dropped_runt",
1002 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1003 			0, mxge_handle_be32,
1004 			"I", "dropped_runt");
1005 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1006 			"dropped_overrun",
1007 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1008 			0, mxge_handle_be32,
1009 			"I", "dropped_overrun");
1010 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1011 			"dropped_no_small_buffer",
1012 			CTLTYPE_INT|CTLFLAG_RD,
1013 			&fw->dropped_no_small_buffer,
1014 			0, mxge_handle_be32,
1015 			"I", "dropped_no_small_buffer");
1016 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1017 			"dropped_no_big_buffer",
1018 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1019 			0, mxge_handle_be32,
1020 			"I", "dropped_no_big_buffer");
1021 
1022 	/* host counters exported for debugging */
1023 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1024 		       "rx_small_cnt",
1025 		       CTLFLAG_RD, &sc->rx_small.cnt,
1026 		       0, "rx_small_cnt");
1027 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1028 		       "rx_big_cnt",
1029 		       CTLFLAG_RD, &sc->rx_big.cnt,
1030 		       0, "rx_small_cnt");
1031 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1032 		       "tx_req",
1033 		       CTLFLAG_RD, &sc->tx.req,
1034 		       0, "tx_req");
1035 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1036 		       "tx_done",
1037 		       CTLFLAG_RD, &sc->tx.done,
1038 		       0, "tx_done");
1039 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1040 		       "tx_pkt_done",
1041 		       CTLFLAG_RD, &sc->tx.pkt_done,
1042 		       0, "tx_done");
1043 
1044 	/* verbose printing? */
1045 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1046 		       "verbose",
1047 		       CTLFLAG_RW, &mxge_verbose,
1048 		       0, "verbose printing");
1049 
1050 }
1051 
1052 /* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1053    backwards one at a time and handle ring wraps */
1054 
1055 static inline void
1056 mxge_submit_req_backwards(mxge_tx_buf_t *tx,
1057 			    mcp_kreq_ether_send_t *src, int cnt)
1058 {
1059         int idx, starting_slot;
1060         starting_slot = tx->req;
1061         while (cnt > 1) {
1062                 cnt--;
1063                 idx = (starting_slot + cnt) & tx->mask;
1064                 mxge_pio_copy(&tx->lanai[idx],
1065 			      &src[cnt], sizeof(*src));
1066                 mb();
1067         }
1068 }
1069 
1070 /*
1071  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1072  * at most 32 bytes at a time, so as to avoid involving the software
1073  * pio handler in the nic.   We re-write the first segment's flags
1074  * to mark them valid only after writing the entire chain
1075  */
1076 
1077 static inline void
1078 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
1079                   int cnt)
1080 {
1081         int idx, i;
1082         uint32_t *src_ints;
1083 	volatile uint32_t *dst_ints;
1084         mcp_kreq_ether_send_t *srcp;
1085 	volatile mcp_kreq_ether_send_t *dstp, *dst;
1086 	uint8_t last_flags;
1087 
1088         idx = tx->req & tx->mask;
1089 
1090 	last_flags = src->flags;
1091 	src->flags = 0;
1092         mb();
1093         dst = dstp = &tx->lanai[idx];
1094         srcp = src;
1095 
1096         if ((idx + cnt) < tx->mask) {
1097                 for (i = 0; i < (cnt - 1); i += 2) {
1098                         mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1099                         mb(); /* force write every 32 bytes */
1100                         srcp += 2;
1101                         dstp += 2;
1102                 }
1103         } else {
1104                 /* submit all but the first request, and ensure
1105                    that it is submitted below */
1106                 mxge_submit_req_backwards(tx, src, cnt);
1107                 i = 0;
1108         }
1109         if (i < cnt) {
1110                 /* submit the first request */
1111                 mxge_pio_copy(dstp, srcp, sizeof(*src));
1112                 mb(); /* barrier before setting valid flag */
1113         }
1114 
1115         /* re-write the last 32-bits with the valid flags */
1116         src->flags = last_flags;
1117         src_ints = (uint32_t *)src;
1118         src_ints+=3;
1119         dst_ints = (volatile uint32_t *)dst;
1120         dst_ints+=3;
1121         *dst_ints =  *src_ints;
1122         tx->req += cnt;
1123         mb();
1124 }
1125 
1126 static inline void
1127 mxge_submit_req_wc(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1128 {
1129     tx->req += cnt;
1130     mb();
1131     while (cnt >= 4) {
1132 	    mxge_pio_copy((volatile char *)tx->wc_fifo, src, 64);
1133 	    mb();
1134 	    src += 4;
1135 	    cnt -= 4;
1136     }
1137     if (cnt > 0) {
1138 	    /* pad it to 64 bytes.  The src is 64 bytes bigger than it
1139 	       needs to be so that we don't overrun it */
1140 	    mxge_pio_copy(tx->wc_fifo + (cnt<<18), src, 64);
1141 	    mb();
1142     }
1143 }
1144 
1145 static void
1146 mxge_encap(mxge_softc_t *sc, struct mbuf *m)
1147 {
1148 	mcp_kreq_ether_send_t *req;
1149 	bus_dma_segment_t seg_list[MXGE_MAX_SEND_DESC];
1150 	bus_dma_segment_t *seg;
1151 	struct mbuf *m_tmp;
1152 	struct ifnet *ifp;
1153 	mxge_tx_buf_t *tx;
1154 	struct ether_header *eh;
1155 	struct ip *ip;
1156 	int cnt, cum_len, err, i, idx;
1157 	uint16_t flags, pseudo_hdr_offset;
1158         uint8_t cksum_offset;
1159 
1160 
1161 
1162 	ifp = sc->ifp;
1163 	tx = &sc->tx;
1164 
1165 	/* (try to) map the frame for DMA */
1166 	idx = tx->req & tx->mask;
1167 	err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1168 				      m, seg_list, &cnt,
1169 				      BUS_DMA_NOWAIT);
1170 	if (err == EFBIG) {
1171 		/* Too many segments in the chain.  Try
1172 		   to defrag */
1173 		m_tmp = m_defrag(m, M_NOWAIT);
1174 		if (m_tmp == NULL) {
1175 			goto drop;
1176 		}
1177 		m = m_tmp;
1178 		err = bus_dmamap_load_mbuf_sg(tx->dmat,
1179 					      tx->info[idx].map,
1180 					      m, seg_list, &cnt,
1181 					      BUS_DMA_NOWAIT);
1182 	}
1183 	if (err != 0) {
1184 		device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d\n",
1185 			      err);
1186 		goto drop;
1187 	}
1188 	bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1189 			BUS_DMASYNC_PREWRITE);
1190 	tx->info[idx].m = m;
1191 
1192 	req = tx->req_list;
1193 	cksum_offset = 0;
1194 	pseudo_hdr_offset = 0;
1195 	flags = MXGEFW_FLAGS_NO_TSO;
1196 
1197 	/* checksum offloading? */
1198 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1199 		eh = mtod(m, struct ether_header *);
1200 		ip = (struct ip *) (eh + 1);
1201 		cksum_offset = sizeof(*eh) + (ip->ip_hl << 2);
1202 		pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
1203 		pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1204 		req->cksum_offset = cksum_offset;
1205 		flags |= MXGEFW_FLAGS_CKSUM;
1206 	}
1207 	if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1208 		flags |= MXGEFW_FLAGS_SMALL;
1209 
1210 	/* convert segments into a request list */
1211 	cum_len = 0;
1212 	seg = seg_list;
1213 	req->flags = MXGEFW_FLAGS_FIRST;
1214 	for (i = 0; i < cnt; i++) {
1215 		req->addr_low =
1216 			htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1217 		req->addr_high =
1218 			htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1219 		req->length = htobe16(seg->ds_len);
1220 		req->cksum_offset = cksum_offset;
1221 		if (cksum_offset > seg->ds_len)
1222 			cksum_offset -= seg->ds_len;
1223 		else
1224 			cksum_offset = 0;
1225 		req->pseudo_hdr_offset = pseudo_hdr_offset;
1226 		req->pad = 0; /* complete solid 16-byte block */
1227 		req->rdma_count = 1;
1228 		req->flags |= flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1229 		cum_len += seg->ds_len;
1230 		seg++;
1231 		req++;
1232 		req->flags = 0;
1233 	}
1234 	req--;
1235 	/* pad runts to 60 bytes */
1236 	if (cum_len < 60) {
1237 		req++;
1238 		req->addr_low =
1239 			htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
1240 		req->addr_high =
1241 			htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
1242 		req->length = htobe16(60 - cum_len);
1243 		req->cksum_offset = 0;
1244 		req->pseudo_hdr_offset = pseudo_hdr_offset;
1245 		req->pad = 0; /* complete solid 16-byte block */
1246 		req->rdma_count = 1;
1247 		req->flags |= flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1248 		cnt++;
1249 	}
1250 
1251 	tx->req_list[0].rdma_count = cnt;
1252 #if 0
1253 	/* print what the firmware will see */
1254 	for (i = 0; i < cnt; i++) {
1255 		printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1256 		    "cso:%d, flags:0x%x, rdma:%d\n",
1257 		    i, (int)ntohl(tx->req_list[i].addr_high),
1258 		    (int)ntohl(tx->req_list[i].addr_low),
1259 		    (int)ntohs(tx->req_list[i].length),
1260 		    (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1261 		    tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1262 		    tx->req_list[i].rdma_count);
1263 	}
1264 	printf("--------------\n");
1265 #endif
1266 	tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1267 	if (tx->wc_fifo == NULL)
1268 		mxge_submit_req(tx, tx->req_list, cnt);
1269 	else
1270 		mxge_submit_req_wc(tx, tx->req_list, cnt);
1271 	return;
1272 
1273 drop:
1274 	m_freem(m);
1275 	ifp->if_oerrors++;
1276 	return;
1277 }
1278 
1279 
1280 
1281 
1282 static inline void
1283 mxge_start_locked(mxge_softc_t *sc)
1284 {
1285 	struct mbuf *m;
1286 	struct ifnet *ifp;
1287 
1288 	ifp = sc->ifp;
1289 	while ((sc->tx.mask - (sc->tx.req - sc->tx.done))
1290 	       > MXGE_MAX_SEND_DESC) {
1291 
1292 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1293 		if (m == NULL) {
1294 			return;
1295 		}
1296 		/* let BPF see it */
1297 		BPF_MTAP(ifp, m);
1298 
1299 		/* give it to the nic */
1300 		mxge_encap(sc, m);
1301 	}
1302 	/* ran out of transmit slots */
1303 	sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1304 }
1305 
1306 static void
1307 mxge_start(struct ifnet *ifp)
1308 {
1309 	mxge_softc_t *sc = ifp->if_softc;
1310 
1311 
1312 	mtx_lock(&sc->tx_lock);
1313 	mxge_start_locked(sc);
1314 	mtx_unlock(&sc->tx_lock);
1315 }
1316 
1317 /*
1318  * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
1319  * at most 32 bytes at a time, so as to avoid involving the software
1320  * pio handler in the nic.   We re-write the first segment's low
1321  * DMA address to mark it valid only after we write the entire chunk
1322  * in a burst
1323  */
1324 static inline void
1325 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
1326 		mcp_kreq_ether_recv_t *src)
1327 {
1328 	uint32_t low;
1329 
1330 	low = src->addr_low;
1331 	src->addr_low = 0xffffffff;
1332 	mxge_pio_copy(dst, src, 8 * sizeof (*src));
1333 	mb();
1334 	dst->addr_low = low;
1335 	mb();
1336 }
1337 
1338 static int
1339 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx)
1340 {
1341 	bus_dma_segment_t seg;
1342 	struct mbuf *m;
1343 	mxge_rx_buf_t *rx = &sc->rx_small;
1344 	int cnt, err;
1345 
1346 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1347 	if (m == NULL) {
1348 		rx->alloc_fail++;
1349 		err = ENOBUFS;
1350 		goto done;
1351 	}
1352 	m->m_len = MHLEN;
1353 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1354 				      &seg, &cnt, BUS_DMA_NOWAIT);
1355 	if (err != 0) {
1356 		m_free(m);
1357 		goto done;
1358 	}
1359 	rx->info[idx].m = m;
1360 	rx->shadow[idx].addr_low =
1361 		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
1362 	rx->shadow[idx].addr_high =
1363 		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
1364 
1365 done:
1366 	if ((idx & 7) == 7) {
1367 		if (rx->wc_fifo == NULL)
1368 			mxge_submit_8rx(&rx->lanai[idx - 7],
1369 					&rx->shadow[idx - 7]);
1370 		else {
1371 			mb();
1372 			mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64);
1373 		}
1374         }
1375 	return err;
1376 }
1377 
1378 static int
1379 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx)
1380 {
1381 	bus_dma_segment_t seg;
1382 	struct mbuf *m;
1383 	mxge_rx_buf_t *rx = &sc->rx_big;
1384 	int cnt, err;
1385 
1386 	m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes);
1387 	if (m == NULL) {
1388 		rx->alloc_fail++;
1389 		err = ENOBUFS;
1390 		goto done;
1391 	}
1392 	m->m_len = sc->big_bytes;
1393 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1394 				      &seg, &cnt, BUS_DMA_NOWAIT);
1395 	if (err != 0) {
1396 		m_free(m);
1397 		goto done;
1398 	}
1399 	rx->info[idx].m = m;
1400 	rx->shadow[idx].addr_low =
1401 		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
1402 	rx->shadow[idx].addr_high =
1403 		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
1404 
1405 done:
1406 	if ((idx & 7) == 7) {
1407 		if (rx->wc_fifo == NULL)
1408 			mxge_submit_8rx(&rx->lanai[idx - 7],
1409 					&rx->shadow[idx - 7]);
1410 		else {
1411 			mb();
1412 			mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64);
1413 		}
1414         }
1415 	return err;
1416 }
1417 
1418 static inline void
1419 mxge_rx_csum(struct mbuf *m, int csum)
1420 {
1421 	struct ether_header *eh;
1422 	struct ip *ip;
1423 
1424 	eh = mtod(m, struct ether_header *);
1425 	if (__predict_true(eh->ether_type ==  htons(ETHERTYPE_IP))) {
1426 		ip = (struct ip *)(eh + 1);
1427 		if (__predict_true(ip->ip_p == IPPROTO_TCP ||
1428 				   ip->ip_p == IPPROTO_UDP)) {
1429 			m->m_pkthdr.csum_data = csum;
1430 			m->m_pkthdr.csum_flags = CSUM_DATA_VALID;
1431 		}
1432 	}
1433 }
1434 
1435 static inline void
1436 mxge_rx_done_big(mxge_softc_t *sc, int len, int csum)
1437 {
1438 	struct ifnet *ifp;
1439 	struct mbuf *m = 0; 		/* -Wunitialized */
1440 	struct mbuf *m_prev = 0;	/* -Wunitialized */
1441 	struct mbuf *m_head = 0;
1442 	bus_dmamap_t old_map;
1443 	mxge_rx_buf_t *rx;
1444 	int idx;
1445 
1446 
1447 	rx = &sc->rx_big;
1448 	ifp = sc->ifp;
1449 	while (len > 0) {
1450 		idx = rx->cnt & rx->mask;
1451                 rx->cnt++;
1452 		/* save a pointer to the received mbuf */
1453 		m = rx->info[idx].m;
1454 		/* try to replace the received mbuf */
1455 		if (mxge_get_buf_big(sc, rx->extra_map, idx)) {
1456 			goto drop;
1457 		}
1458 		/* unmap the received buffer */
1459 		old_map = rx->info[idx].map;
1460 		bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1461 		bus_dmamap_unload(rx->dmat, old_map);
1462 
1463 		/* swap the bus_dmamap_t's */
1464 		rx->info[idx].map = rx->extra_map;
1465 		rx->extra_map = old_map;
1466 
1467 		/* chain multiple segments together */
1468 		if (!m_head) {
1469 			m_head = m;
1470 			/* mcp implicitly skips 1st bytes so that
1471 			 * packet is properly aligned */
1472 			m->m_data += MXGEFW_PAD;
1473 			m->m_pkthdr.len = len;
1474 			m->m_len = sc->big_bytes - MXGEFW_PAD;
1475 		} else {
1476 			m->m_len = sc->big_bytes;
1477 			m->m_flags &= ~M_PKTHDR;
1478 			m_prev->m_next = m;
1479 		}
1480 		len -= m->m_len;
1481 		m_prev = m;
1482 	}
1483 
1484 	/* trim trailing garbage from the last mbuf in the chain.  If
1485 	 * there is any garbage, len will be negative */
1486 	m->m_len += len;
1487 
1488 	/* if the checksum is valid, mark it in the mbuf header */
1489 	if (sc->csum_flag)
1490 		mxge_rx_csum(m_head, csum);
1491 
1492 	/* pass the frame up the stack */
1493 	m_head->m_pkthdr.rcvif = ifp;
1494 	ifp->if_ipackets++;
1495 	(*ifp->if_input)(ifp, m_head);
1496 	return;
1497 
1498 drop:
1499 	/* drop the frame -- the old mbuf(s) are re-cycled by running
1500 	   every slot through the allocator */
1501         if (m_head) {
1502                 len -= sc->big_bytes;
1503                 m_freem(m_head);
1504         } else {
1505                 len -= (sc->big_bytes + MXGEFW_PAD);
1506         }
1507         while ((int)len > 0) {
1508                 idx = rx->cnt & rx->mask;
1509                 rx->cnt++;
1510                 m = rx->info[idx].m;
1511                 if (0 == (mxge_get_buf_big(sc, rx->extra_map, idx))) {
1512 			m_freem(m);
1513 			/* unmap the received buffer */
1514 			old_map = rx->info[idx].map;
1515 			bus_dmamap_sync(rx->dmat, old_map,
1516 					BUS_DMASYNC_POSTREAD);
1517 			bus_dmamap_unload(rx->dmat, old_map);
1518 
1519 			/* swap the bus_dmamap_t's */
1520 			rx->info[idx].map = rx->extra_map;
1521 			rx->extra_map = old_map;
1522 		}
1523                 len -= sc->big_bytes;
1524         }
1525 
1526 	ifp->if_ierrors++;
1527 
1528 }
1529 
1530 static inline void
1531 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum)
1532 {
1533 	struct ifnet *ifp;
1534 	struct mbuf *m;
1535 	mxge_rx_buf_t *rx;
1536 	bus_dmamap_t old_map;
1537 	int idx;
1538 
1539 	ifp = sc->ifp;
1540 	rx = &sc->rx_small;
1541 	idx = rx->cnt & rx->mask;
1542 	rx->cnt++;
1543 	/* save a pointer to the received mbuf */
1544 	m = rx->info[idx].m;
1545 	/* try to replace the received mbuf */
1546 	if (mxge_get_buf_small(sc, rx->extra_map, idx)) {
1547 		/* drop the frame -- the old mbuf is re-cycled */
1548 		ifp->if_ierrors++;
1549 		return;
1550 	}
1551 
1552 	/* unmap the received buffer */
1553 	old_map = rx->info[idx].map;
1554 	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1555 	bus_dmamap_unload(rx->dmat, old_map);
1556 
1557 	/* swap the bus_dmamap_t's */
1558 	rx->info[idx].map = rx->extra_map;
1559 	rx->extra_map = old_map;
1560 
1561 	/* mcp implicitly skips 1st 2 bytes so that packet is properly
1562 	 * aligned */
1563 	m->m_data += MXGEFW_PAD;
1564 
1565 	/* if the checksum is valid, mark it in the mbuf header */
1566 	if (sc->csum_flag)
1567 		mxge_rx_csum(m, csum);
1568 
1569 	/* pass the frame up the stack */
1570 	m->m_pkthdr.rcvif = ifp;
1571 	m->m_len = m->m_pkthdr.len = len;
1572 	ifp->if_ipackets++;
1573 	(*ifp->if_input)(ifp, m);
1574 }
1575 
1576 static inline void
1577 mxge_clean_rx_done(mxge_softc_t *sc)
1578 {
1579 	mxge_rx_done_t *rx_done = &sc->rx_done;
1580 	int limit = 0;
1581 	uint16_t length;
1582 	uint16_t checksum;
1583 
1584 
1585 	while (rx_done->entry[rx_done->idx].length != 0) {
1586 		length = ntohs(rx_done->entry[rx_done->idx].length);
1587 		rx_done->entry[rx_done->idx].length = 0;
1588 		checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
1589 		if (length <= MHLEN)
1590 			mxge_rx_done_small(sc, length, checksum);
1591 		else
1592 			mxge_rx_done_big(sc, length, checksum);
1593 		rx_done->cnt++;
1594 		rx_done->idx = rx_done->cnt & (mxge_max_intr_slots - 1);
1595 
1596 		/* limit potential for livelock */
1597 		if (__predict_false(++limit > 2 * mxge_max_intr_slots))
1598 			break;
1599 
1600 	}
1601 }
1602 
1603 
1604 static inline void
1605 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx)
1606 {
1607 	struct ifnet *ifp;
1608 	mxge_tx_buf_t *tx;
1609 	struct mbuf *m;
1610 	bus_dmamap_t map;
1611 	int idx, limit;
1612 
1613 	limit = 0;
1614 	tx = &sc->tx;
1615 	ifp = sc->ifp;
1616 	while (tx->pkt_done != mcp_idx) {
1617 		idx = tx->done & tx->mask;
1618 		tx->done++;
1619 		m = tx->info[idx].m;
1620 		/* mbuf and DMA map only attached to the first
1621 		   segment per-mbuf */
1622 		if (m != NULL) {
1623 			ifp->if_opackets++;
1624 			tx->info[idx].m = NULL;
1625 			map = tx->info[idx].map;
1626 			bus_dmamap_unload(tx->dmat, map);
1627 			m_freem(m);
1628 		}
1629 		if (tx->info[idx].flag) {
1630 			tx->info[idx].flag = 0;
1631 			tx->pkt_done++;
1632 		}
1633 		/* limit potential for livelock by only handling
1634 		   2 full tx rings per call */
1635 		if (__predict_false(++limit >  2 * tx->mask))
1636 			break;
1637 	}
1638 
1639 	/* If we have space, clear IFF_OACTIVE to tell the stack that
1640            its OK to send packets */
1641 
1642 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
1643 	    tx->req - tx->done < (tx->mask + 1)/4) {
1644 		mtx_lock(&sc->tx_lock);
1645 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1646 		mxge_start_locked(sc);
1647 		mtx_unlock(&sc->tx_lock);
1648 	}
1649 }
1650 
1651 static void
1652 mxge_intr(void *arg)
1653 {
1654 	mxge_softc_t *sc = arg;
1655 	mcp_irq_data_t *stats = sc->fw_stats;
1656 	mxge_tx_buf_t *tx = &sc->tx;
1657 	mxge_rx_done_t *rx_done = &sc->rx_done;
1658 	uint32_t send_done_count;
1659 	uint8_t valid;
1660 
1661 
1662 	/* make sure the DMA has finished */
1663 	if (!stats->valid) {
1664 		return;
1665 	}
1666 	valid = stats->valid;
1667 
1668 	/* lower legacy IRQ  */
1669 	*sc->irq_deassert = 0;
1670 	mb();
1671 	if (!mxge_deassert_wait)
1672 		/* don't wait for conf. that irq is low */
1673 		stats->valid = 0;
1674 	do {
1675 		/* check for transmit completes and receives */
1676 		send_done_count = be32toh(stats->send_done_count);
1677 		while ((send_done_count != tx->pkt_done) ||
1678 		       (rx_done->entry[rx_done->idx].length != 0)) {
1679 			mxge_tx_done(sc, (int)send_done_count);
1680 			mxge_clean_rx_done(sc);
1681 			send_done_count = be32toh(stats->send_done_count);
1682 		}
1683 	} while (*((volatile uint8_t *) &stats->valid));
1684 
1685 	if (__predict_false(stats->stats_updated)) {
1686 		if (sc->link_state != stats->link_up) {
1687 			sc->link_state = stats->link_up;
1688 			if (sc->link_state) {
1689 				if_link_state_change(sc->ifp, LINK_STATE_UP);
1690 				if (mxge_verbose)
1691 					device_printf(sc->dev, "link up\n");
1692 			} else {
1693 				if_link_state_change(sc->ifp, LINK_STATE_DOWN);
1694 				if (mxge_verbose)
1695 					device_printf(sc->dev, "link down\n");
1696 			}
1697 		}
1698 		if (sc->rdma_tags_available !=
1699 		    be32toh(sc->fw_stats->rdma_tags_available)) {
1700 			sc->rdma_tags_available =
1701 				be32toh(sc->fw_stats->rdma_tags_available);
1702 			device_printf(sc->dev, "RDMA timed out! %d tags "
1703 				      "left\n", sc->rdma_tags_available);
1704 		}
1705 		sc->down_cnt += stats->link_down;
1706 	}
1707 
1708 	/* check to see if we have rx token to pass back */
1709 	if (valid & 0x1)
1710 	    *sc->irq_claim = be32toh(3);
1711 	*(sc->irq_claim + 1) = be32toh(3);
1712 }
1713 
1714 static void
1715 mxge_watchdog(struct ifnet *ifp)
1716 {
1717 	printf("%s called\n", __FUNCTION__);
1718 }
1719 
1720 static void
1721 mxge_init(void *arg)
1722 {
1723 }
1724 
1725 
1726 
1727 static void
1728 mxge_free_mbufs(mxge_softc_t *sc)
1729 {
1730 	int i;
1731 
1732 	for (i = 0; i <= sc->rx_big.mask; i++) {
1733 		if (sc->rx_big.info[i].m == NULL)
1734 			continue;
1735 		bus_dmamap_unload(sc->rx_big.dmat,
1736 				  sc->rx_big.info[i].map);
1737 		m_freem(sc->rx_big.info[i].m);
1738 		sc->rx_big.info[i].m = NULL;
1739 	}
1740 
1741 	for (i = 0; i <= sc->rx_big.mask; i++) {
1742 		if (sc->rx_big.info[i].m == NULL)
1743 			continue;
1744 		bus_dmamap_unload(sc->rx_big.dmat,
1745 				  sc->rx_big.info[i].map);
1746 		m_freem(sc->rx_big.info[i].m);
1747 		sc->rx_big.info[i].m = NULL;
1748 	}
1749 
1750 	for (i = 0; i <= sc->tx.mask; i++) {
1751 		if (sc->tx.info[i].m == NULL)
1752 			continue;
1753 		bus_dmamap_unload(sc->tx.dmat,
1754 				  sc->tx.info[i].map);
1755 		m_freem(sc->tx.info[i].m);
1756 		sc->tx.info[i].m = NULL;
1757 	}
1758 }
1759 
1760 static void
1761 mxge_free_rings(mxge_softc_t *sc)
1762 {
1763 	int i;
1764 
1765 	if (sc->tx.req_bytes != NULL) {
1766 		free(sc->tx.req_bytes, M_DEVBUF);
1767 	}
1768 	if (sc->rx_small.shadow != NULL)
1769 		free(sc->rx_small.shadow, M_DEVBUF);
1770 	if (sc->rx_big.shadow != NULL)
1771 		free(sc->rx_big.shadow, M_DEVBUF);
1772 	if (sc->tx.info != NULL) {
1773 		for (i = 0; i <= sc->tx.mask; i++) {
1774 			if (sc->tx.info[i].map != NULL)
1775 				bus_dmamap_destroy(sc->tx.dmat,
1776 						   sc->tx.info[i].map);
1777 		}
1778 		free(sc->tx.info, M_DEVBUF);
1779 	}
1780 	if (sc->rx_small.info != NULL) {
1781 		for (i = 0; i <= sc->rx_small.mask; i++) {
1782 			if (sc->rx_small.info[i].map != NULL)
1783 				bus_dmamap_destroy(sc->rx_small.dmat,
1784 						   sc->rx_small.info[i].map);
1785 		}
1786 		free(sc->rx_small.info, M_DEVBUF);
1787 	}
1788 	if (sc->rx_big.info != NULL) {
1789 		for (i = 0; i <= sc->rx_big.mask; i++) {
1790 			if (sc->rx_big.info[i].map != NULL)
1791 				bus_dmamap_destroy(sc->rx_big.dmat,
1792 						   sc->rx_big.info[i].map);
1793 		}
1794 		free(sc->rx_big.info, M_DEVBUF);
1795 	}
1796 	if (sc->rx_big.extra_map != NULL)
1797 		bus_dmamap_destroy(sc->rx_big.dmat,
1798 				   sc->rx_big.extra_map);
1799 	if (sc->rx_small.extra_map != NULL)
1800 		bus_dmamap_destroy(sc->rx_small.dmat,
1801 				   sc->rx_small.extra_map);
1802 	if (sc->tx.dmat != NULL)
1803 		bus_dma_tag_destroy(sc->tx.dmat);
1804 	if (sc->rx_small.dmat != NULL)
1805 		bus_dma_tag_destroy(sc->rx_small.dmat);
1806 	if (sc->rx_big.dmat != NULL)
1807 		bus_dma_tag_destroy(sc->rx_big.dmat);
1808 }
1809 
1810 static int
1811 mxge_alloc_rings(mxge_softc_t *sc)
1812 {
1813 	mxge_cmd_t cmd;
1814 	int tx_ring_size, rx_ring_size;
1815 	int tx_ring_entries, rx_ring_entries;
1816 	int i, err;
1817 	unsigned long bytes;
1818 
1819 	/* get ring sizes */
1820 	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1821 	tx_ring_size = cmd.data0;
1822 	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1823 	if (err != 0) {
1824 		device_printf(sc->dev, "Cannot determine ring sizes\n");
1825 		goto abort_with_nothing;
1826 	}
1827 
1828 	rx_ring_size = cmd.data0;
1829 
1830 	tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
1831 	rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
1832 	sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
1833 	IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
1834 	IFQ_SET_READY(&sc->ifp->if_snd);
1835 
1836 	sc->tx.mask = tx_ring_entries - 1;
1837 	sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
1838 
1839 	err = ENOMEM;
1840 
1841 	/* allocate the tx request copy block */
1842 	bytes = 8 +
1843 		sizeof (*sc->tx.req_list) * (MXGE_MAX_SEND_DESC + 4);
1844 	sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
1845 	if (sc->tx.req_bytes == NULL)
1846 		goto abort_with_nothing;
1847 	/* ensure req_list entries are aligned to 8 bytes */
1848 	sc->tx.req_list = (mcp_kreq_ether_send_t *)
1849 		((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
1850 
1851 	/* allocate the rx shadow rings */
1852 	bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
1853 	sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1854 	if (sc->rx_small.shadow == NULL)
1855 		goto abort_with_alloc;
1856 
1857 	bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
1858 	sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1859 	if (sc->rx_big.shadow == NULL)
1860 		goto abort_with_alloc;
1861 
1862 	/* allocate the host info rings */
1863 	bytes = tx_ring_entries * sizeof (*sc->tx.info);
1864 	sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1865 	if (sc->tx.info == NULL)
1866 		goto abort_with_alloc;
1867 
1868 	bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
1869 	sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1870 	if (sc->rx_small.info == NULL)
1871 		goto abort_with_alloc;
1872 
1873 	bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
1874 	sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1875 	if (sc->rx_big.info == NULL)
1876 		goto abort_with_alloc;
1877 
1878 	/* allocate the busdma resources */
1879 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1880 				 1,			/* alignment */
1881 				 sc->tx.boundary,	/* boundary */
1882 				 BUS_SPACE_MAXADDR,	/* low */
1883 				 BUS_SPACE_MAXADDR,	/* high */
1884 				 NULL, NULL,		/* filter */
1885 				 MXGE_MAX_ETHER_MTU,	/* maxsize */
1886 				 MXGE_MAX_SEND_DESC,	/* num segs */
1887 				 sc->tx.boundary,	/* maxsegsize */
1888 				 BUS_DMA_ALLOCNOW,	/* flags */
1889 				 NULL, NULL,		/* lock */
1890 				 &sc->tx.dmat);		/* tag */
1891 
1892 	if (err != 0) {
1893 		device_printf(sc->dev, "Err %d allocating tx dmat\n",
1894 			      err);
1895 		goto abort_with_alloc;
1896 	}
1897 
1898 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1899 				 1,			/* alignment */
1900 				 4096,			/* boundary */
1901 				 BUS_SPACE_MAXADDR,	/* low */
1902 				 BUS_SPACE_MAXADDR,	/* high */
1903 				 NULL, NULL,		/* filter */
1904 				 MHLEN,			/* maxsize */
1905 				 1,			/* num segs */
1906 				 MHLEN,			/* maxsegsize */
1907 				 BUS_DMA_ALLOCNOW,	/* flags */
1908 				 NULL, NULL,		/* lock */
1909 				 &sc->rx_small.dmat);	/* tag */
1910 	if (err != 0) {
1911 		device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
1912 			      err);
1913 		goto abort_with_alloc;
1914 	}
1915 
1916 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1917 				 1,			/* alignment */
1918 				 4096,			/* boundary */
1919 				 BUS_SPACE_MAXADDR,	/* low */
1920 				 BUS_SPACE_MAXADDR,	/* high */
1921 				 NULL, NULL,		/* filter */
1922 				 4096,			/* maxsize */
1923 				 1,			/* num segs */
1924 				 4096,			/* maxsegsize */
1925 				 BUS_DMA_ALLOCNOW,	/* flags */
1926 				 NULL, NULL,		/* lock */
1927 				 &sc->rx_big.dmat);	/* tag */
1928 	if (err != 0) {
1929 		device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
1930 			      err);
1931 		goto abort_with_alloc;
1932 	}
1933 
1934 	/* now use these tags to setup dmamaps for each slot
1935 	   in each ring */
1936 	for (i = 0; i <= sc->tx.mask; i++) {
1937 		err = bus_dmamap_create(sc->tx.dmat, 0,
1938 					&sc->tx.info[i].map);
1939 		if (err != 0) {
1940 			device_printf(sc->dev, "Err %d  tx dmamap\n",
1941 			      err);
1942 			goto abort_with_alloc;
1943 		}
1944 	}
1945 	for (i = 0; i <= sc->rx_small.mask; i++) {
1946 		err = bus_dmamap_create(sc->rx_small.dmat, 0,
1947 					&sc->rx_small.info[i].map);
1948 		if (err != 0) {
1949 			device_printf(sc->dev, "Err %d  rx_small dmamap\n",
1950 				      err);
1951 			goto abort_with_alloc;
1952 		}
1953 	}
1954 	err = bus_dmamap_create(sc->rx_small.dmat, 0,
1955 				&sc->rx_small.extra_map);
1956 	if (err != 0) {
1957 		device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
1958 			      err);
1959 			goto abort_with_alloc;
1960 	}
1961 
1962 	for (i = 0; i <= sc->rx_big.mask; i++) {
1963 		err = bus_dmamap_create(sc->rx_big.dmat, 0,
1964 					&sc->rx_big.info[i].map);
1965 		if (err != 0) {
1966 			device_printf(sc->dev, "Err %d  rx_big dmamap\n",
1967 			      err);
1968 			goto abort_with_alloc;
1969 		}
1970 	}
1971 	err = bus_dmamap_create(sc->rx_big.dmat, 0,
1972 				&sc->rx_big.extra_map);
1973 	if (err != 0) {
1974 		device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
1975 			      err);
1976 			goto abort_with_alloc;
1977 	}
1978 	return 0;
1979 
1980 abort_with_alloc:
1981 	mxge_free_rings(sc);
1982 
1983 abort_with_nothing:
1984 	return err;
1985 }
1986 
1987 static int
1988 mxge_open(mxge_softc_t *sc)
1989 {
1990 	mxge_cmd_t cmd;
1991 	int i, err;
1992 	bus_dmamap_t map;
1993 
1994 
1995 	err = mxge_reset(sc);
1996 	if (err != 0) {
1997 		device_printf(sc->dev, "failed to reset\n");
1998 		return EIO;
1999 	}
2000 
2001 	if (MCLBYTES >=
2002 	    sc->ifp->if_mtu + ETHER_HDR_LEN + MXGEFW_PAD)
2003 		sc->big_bytes = MCLBYTES;
2004 	else
2005 		sc->big_bytes = MJUMPAGESIZE;
2006 
2007 	err = mxge_alloc_rings(sc);
2008 	if (err != 0) {
2009 		device_printf(sc->dev, "failed to allocate rings\n");
2010 		return err;
2011 	}
2012 
2013 	err = bus_setup_intr(sc->dev, sc->irq_res,
2014 			     INTR_TYPE_NET | INTR_MPSAFE,
2015 			     mxge_intr, sc, &sc->ih);
2016 	if (err != 0) {
2017 		goto abort_with_rings;
2018 	}
2019 
2020 	/* get the lanai pointers to the send and receive rings */
2021 
2022 	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
2023 	sc->tx.lanai =
2024 		(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
2025 	err |= mxge_send_cmd(sc,
2026 				 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
2027 	sc->rx_small.lanai =
2028 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2029 	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
2030 	sc->rx_big.lanai =
2031 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2032 
2033 	if (err != 0) {
2034 		device_printf(sc->dev,
2035 			      "failed to get ring sizes or locations\n");
2036 		err = EIO;
2037 		goto abort_with_irq;
2038 	}
2039 
2040 	if (sc->wc) {
2041 		sc->tx.wc_fifo = sc->sram + 0x200000;
2042 		sc->rx_small.wc_fifo = sc->sram + 0x300000;
2043 		sc->rx_big.wc_fifo = sc->sram + 0x340000;
2044 	} else {
2045 		sc->tx.wc_fifo = 0;
2046 		sc->rx_small.wc_fifo = 0;
2047 		sc->rx_big.wc_fifo = 0;
2048 	}
2049 
2050 
2051 	/* stock receive rings */
2052 	for (i = 0; i <= sc->rx_small.mask; i++) {
2053 		map = sc->rx_small.info[i].map;
2054 		err = mxge_get_buf_small(sc, map, i);
2055 		if (err) {
2056 			device_printf(sc->dev, "alloced %d/%d smalls\n",
2057 				      i, sc->rx_small.mask + 1);
2058 			goto abort;
2059 		}
2060 	}
2061 	for (i = 0; i <= sc->rx_big.mask; i++) {
2062 		map = sc->rx_big.info[i].map;
2063 		err = mxge_get_buf_big(sc, map, i);
2064 		if (err) {
2065 			device_printf(sc->dev, "alloced %d/%d bigs\n",
2066 				      i, sc->rx_big.mask + 1);
2067 			goto abort;
2068 		}
2069 	}
2070 
2071 	/* Give the firmware the mtu and the big and small buffer
2072 	   sizes.  The firmware wants the big buf size to be a power
2073 	   of two. Luckily, FreeBSD's clusters are powers of two */
2074 	cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN;
2075 	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
2076 	cmd.data0 = MHLEN;
2077 	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
2078 			     &cmd);
2079 	cmd.data0 = sc->big_bytes;
2080 	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2081 	/* Now give him the pointer to the stats block */
2082 	cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
2083 	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
2084 	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA, &cmd);
2085 
2086 	if (err != 0) {
2087 		device_printf(sc->dev, "failed to setup params\n");
2088 		goto abort;
2089 	}
2090 
2091 	/* Finally, start the firmware running */
2092 	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
2093 	if (err) {
2094 		device_printf(sc->dev, "Couldn't bring up link\n");
2095 		goto abort;
2096 	}
2097 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2098 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2099 
2100 	return 0;
2101 
2102 
2103 abort:
2104 	mxge_free_mbufs(sc);
2105 abort_with_irq:
2106 	bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2107 abort_with_rings:
2108 	mxge_free_rings(sc);
2109 	return err;
2110 }
2111 
2112 static int
2113 mxge_close(mxge_softc_t *sc)
2114 {
2115 	mxge_cmd_t cmd;
2116 	int err, old_down_cnt;
2117 
2118 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2119 	old_down_cnt = sc->down_cnt;
2120 	mb();
2121 	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2122 	if (err) {
2123 		device_printf(sc->dev, "Couldn't bring down link\n");
2124 	}
2125 	if (old_down_cnt == sc->down_cnt) {
2126 		/* wait for down irq */
2127 		(void)tsleep(&sc->down_cnt, PWAIT, "down mxge", hz);
2128 	}
2129 	if (old_down_cnt == sc->down_cnt) {
2130 		device_printf(sc->dev, "never got down irq\n");
2131 	}
2132 	if (sc->ih != NULL)
2133 		bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2134 	mxge_free_mbufs(sc);
2135 	mxge_free_rings(sc);
2136 	return 0;
2137 }
2138 
2139 
2140 static int
2141 mxge_media_change(struct ifnet *ifp)
2142 {
2143 	return EINVAL;
2144 }
2145 
2146 static int
2147 mxge_change_mtu(mxge_softc_t *sc, int mtu)
2148 {
2149 	struct ifnet *ifp = sc->ifp;
2150 	int real_mtu, old_mtu;
2151 	int err = 0;
2152 
2153 
2154 	real_mtu = mtu + ETHER_HDR_LEN;
2155 	if ((real_mtu > MXGE_MAX_ETHER_MTU) ||
2156 	    real_mtu < 60)
2157 		return EINVAL;
2158 	sx_xlock(&sc->driver_lock);
2159 	old_mtu = ifp->if_mtu;
2160 	ifp->if_mtu = mtu;
2161 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2162 		mxge_close(sc);
2163 		err = mxge_open(sc);
2164 		if (err != 0) {
2165 			ifp->if_mtu = old_mtu;
2166 			mxge_close(sc);
2167 			(void) mxge_open(sc);
2168 		}
2169 	}
2170 	sx_xunlock(&sc->driver_lock);
2171 	return err;
2172 }
2173 
2174 static void
2175 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2176 {
2177 	mxge_softc_t *sc = ifp->if_softc;
2178 
2179 
2180 	if (sc == NULL)
2181 		return;
2182 	ifmr->ifm_status = IFM_AVALID;
2183 	ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0;
2184 	ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
2185 	ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0;
2186 }
2187 
2188 static int
2189 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2190 {
2191 	mxge_softc_t *sc = ifp->if_softc;
2192 	struct ifreq *ifr = (struct ifreq *)data;
2193 	int err, mask;
2194 
2195 	err = 0;
2196 	switch (command) {
2197 	case SIOCSIFADDR:
2198 	case SIOCGIFADDR:
2199 		err = ether_ioctl(ifp, command, data);
2200 		break;
2201 
2202 	case SIOCSIFMTU:
2203 		err = mxge_change_mtu(sc, ifr->ifr_mtu);
2204 		break;
2205 
2206 	case SIOCSIFFLAGS:
2207 		sx_xlock(&sc->driver_lock);
2208 		if (ifp->if_flags & IFF_UP) {
2209 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
2210 				err = mxge_open(sc);
2211 		} else {
2212 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2213 				mxge_close(sc);
2214 		}
2215 		sx_xunlock(&sc->driver_lock);
2216 		break;
2217 
2218 	case SIOCADDMULTI:
2219 	case SIOCDELMULTI:
2220 		err = 0;
2221 		break;
2222 
2223 	case SIOCSIFCAP:
2224 		sx_xlock(&sc->driver_lock);
2225 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2226 		if (mask & IFCAP_TXCSUM) {
2227 			if (IFCAP_TXCSUM & ifp->if_capenable) {
2228 				ifp->if_capenable &= ~IFCAP_TXCSUM;
2229 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
2230 			} else {
2231 				ifp->if_capenable |= IFCAP_TXCSUM;
2232 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
2233 			}
2234 		} else if (mask & IFCAP_RXCSUM) {
2235 			if (IFCAP_RXCSUM & ifp->if_capenable) {
2236 				ifp->if_capenable &= ~IFCAP_RXCSUM;
2237 				sc->csum_flag = 0;
2238 			} else {
2239 				ifp->if_capenable |= IFCAP_RXCSUM;
2240 				sc->csum_flag = 1;
2241 			}
2242 		}
2243 		sx_xunlock(&sc->driver_lock);
2244 		break;
2245 
2246 	case SIOCGIFMEDIA:
2247 		err = ifmedia_ioctl(ifp, (struct ifreq *)data,
2248 				    &sc->media, command);
2249                 break;
2250 
2251 	default:
2252 		err = ENOTTY;
2253         }
2254 	return err;
2255 }
2256 
2257 static void
2258 mxge_fetch_tunables(mxge_softc_t *sc)
2259 {
2260 
2261 	TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
2262 			  &mxge_flow_control);
2263 	TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
2264 			  &mxge_intr_coal_delay);
2265 	TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
2266 			  &mxge_nvidia_ecrc_enable);
2267 	TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
2268 			  &mxge_deassert_wait);
2269 	TUNABLE_INT_FETCH("hw.mxge.verbose",
2270 			  &mxge_verbose);
2271 
2272 	if (bootverbose)
2273 		mxge_verbose = 1;
2274 	if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
2275 		mxge_intr_coal_delay = 30;
2276 	sc->pause = mxge_flow_control;
2277 }
2278 
2279 static int
2280 mxge_attach(device_t dev)
2281 {
2282 	mxge_softc_t *sc = device_get_softc(dev);
2283 	struct ifnet *ifp;
2284 	size_t bytes;
2285 	int rid, err;
2286 	uint16_t cmd;
2287 
2288 	sc->dev = dev;
2289 	mxge_fetch_tunables(sc);
2290 
2291 	err = bus_dma_tag_create(NULL,			/* parent */
2292 				 1,			/* alignment */
2293 				 4096,			/* boundary */
2294 				 BUS_SPACE_MAXADDR,	/* low */
2295 				 BUS_SPACE_MAXADDR,	/* high */
2296 				 NULL, NULL,		/* filter */
2297 				 MXGE_MAX_ETHER_MTU,	/* maxsize */
2298 				 MXGE_MAX_SEND_DESC, 	/* num segs */
2299 				 4096,			/* maxsegsize */
2300 				 0,			/* flags */
2301 				 NULL, NULL,		/* lock */
2302 				 &sc->parent_dmat);	/* tag */
2303 
2304 	if (err != 0) {
2305 		device_printf(sc->dev, "Err %d allocating parent dmat\n",
2306 			      err);
2307 		goto abort_with_nothing;
2308 	}
2309 
2310 	ifp = sc->ifp = if_alloc(IFT_ETHER);
2311 	if (ifp == NULL) {
2312 		device_printf(dev, "can not if_alloc()\n");
2313 		err = ENOSPC;
2314 		goto abort_with_parent_dmat;
2315 	}
2316 	mtx_init(&sc->cmd_lock, NULL,
2317 		 MTX_NETWORK_LOCK, MTX_DEF);
2318 	mtx_init(&sc->tx_lock, device_get_nameunit(dev),
2319 		 MTX_NETWORK_LOCK, MTX_DEF);
2320 	sx_init(&sc->driver_lock, device_get_nameunit(dev));
2321 
2322 	/* Enable DMA and Memory space access */
2323 	pci_enable_busmaster(dev);
2324 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2325 	cmd |= PCIM_CMD_MEMEN;
2326 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2327 
2328 	/* Map the board into the kernel */
2329 	rid = PCIR_BARS;
2330 	sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
2331 					 ~0, 1, RF_ACTIVE);
2332 	if (sc->mem_res == NULL) {
2333 		device_printf(dev, "could not map memory\n");
2334 		err = ENXIO;
2335 		goto abort_with_lock;
2336 	}
2337 	sc->sram = rman_get_virtual(sc->mem_res);
2338 	sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
2339 	if (sc->sram_size > rman_get_size(sc->mem_res)) {
2340 		device_printf(dev, "impossible memory region size %ld\n",
2341 			      rman_get_size(sc->mem_res));
2342 		err = ENXIO;
2343 		goto abort_with_mem_res;
2344 	}
2345 
2346 	/* make NULL terminated copy of the EEPROM strings section of
2347 	   lanai SRAM */
2348 	bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
2349 	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
2350 				rman_get_bushandle(sc->mem_res),
2351 				sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
2352 				sc->eeprom_strings,
2353 				MXGE_EEPROM_STRINGS_SIZE - 2);
2354 	err = mxge_parse_strings(sc);
2355 	if (err != 0)
2356 		goto abort_with_mem_res;
2357 
2358 	/* Enable write combining for efficient use of PCIe bus */
2359 	mxge_enable_wc(sc);
2360 
2361 	/* Allocate the out of band dma memory */
2362 	err = mxge_dma_alloc(sc, &sc->cmd_dma,
2363 			     sizeof (mxge_cmd_t), 64);
2364 	if (err != 0)
2365 		goto abort_with_mem_res;
2366 	sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
2367 	err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
2368 	if (err != 0)
2369 		goto abort_with_cmd_dma;
2370 
2371 	err = mxge_dma_alloc(sc, &sc->fw_stats_dma,
2372 			     sizeof (*sc->fw_stats), 64);
2373 	if (err != 0)
2374 		goto abort_with_zeropad_dma;
2375 	sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr;
2376 
2377 
2378 	/* allocate interrupt queues */
2379 	bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);
2380 	err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096);
2381 	if (err != 0)
2382 		goto abort_with_fw_stats;
2383 	sc->rx_done.entry = sc->rx_done.dma.addr;
2384 	bzero(sc->rx_done.entry, bytes);
2385 	/* Add our ithread  */
2386 	rid = 0;
2387 	sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
2388 					 1, RF_SHAREABLE | RF_ACTIVE);
2389 	if (sc->irq_res == NULL) {
2390 		device_printf(dev, "could not alloc interrupt\n");
2391 		goto abort_with_rx_done;
2392 	}
2393 
2394 	/* load the firmware */
2395 	mxge_select_firmware(sc);
2396 
2397 	err = mxge_load_firmware(sc);
2398 	if (err != 0)
2399 		goto abort_with_irq_res;
2400 	sc->intr_coal_delay = mxge_intr_coal_delay;
2401 	err = mxge_reset(sc);
2402 	if (err != 0)
2403 		goto abort_with_irq_res;
2404 
2405 	/* hook into the network stack */
2406 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2407 	ifp->if_baudrate = 100000000;
2408 	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM;
2409 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP;
2410 	ifp->if_capenable = ifp->if_capabilities;
2411 	sc->csum_flag = 1;
2412         ifp->if_init = mxge_init;
2413         ifp->if_softc = sc;
2414         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2415         ifp->if_ioctl = mxge_ioctl;
2416         ifp->if_start = mxge_start;
2417 	ifp->if_watchdog = mxge_watchdog;
2418 	ether_ifattach(ifp, sc->mac_addr);
2419 	/* ether_ifattach sets mtu to 1500 */
2420 	ifp->if_mtu = MXGE_MAX_ETHER_MTU - ETHER_HDR_LEN;
2421 
2422 	/* Initialise the ifmedia structure */
2423 	ifmedia_init(&sc->media, 0, mxge_media_change,
2424 		     mxge_media_status);
2425 	ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL);
2426 	mxge_add_sysctls(sc);
2427 	return 0;
2428 
2429 abort_with_irq_res:
2430 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2431 abort_with_rx_done:
2432 	sc->rx_done.entry = NULL;
2433 	mxge_dma_free(&sc->rx_done.dma);
2434 abort_with_fw_stats:
2435 	mxge_dma_free(&sc->fw_stats_dma);
2436 abort_with_zeropad_dma:
2437 	mxge_dma_free(&sc->zeropad_dma);
2438 abort_with_cmd_dma:
2439 	mxge_dma_free(&sc->cmd_dma);
2440 abort_with_mem_res:
2441 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2442 abort_with_lock:
2443 	pci_disable_busmaster(dev);
2444 	mtx_destroy(&sc->cmd_lock);
2445 	mtx_destroy(&sc->tx_lock);
2446 	sx_destroy(&sc->driver_lock);
2447 	if_free(ifp);
2448 abort_with_parent_dmat:
2449 	bus_dma_tag_destroy(sc->parent_dmat);
2450 
2451 abort_with_nothing:
2452 	return err;
2453 }
2454 
2455 static int
2456 mxge_detach(device_t dev)
2457 {
2458 	mxge_softc_t *sc = device_get_softc(dev);
2459 
2460 	sx_xlock(&sc->driver_lock);
2461 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
2462 		mxge_close(sc);
2463 	sx_xunlock(&sc->driver_lock);
2464 	ether_ifdetach(sc->ifp);
2465 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2466 	sc->rx_done.entry = NULL;
2467 	mxge_dma_free(&sc->rx_done.dma);
2468 	mxge_dma_free(&sc->fw_stats_dma);
2469 	mxge_dma_free(&sc->zeropad_dma);
2470 	mxge_dma_free(&sc->cmd_dma);
2471 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2472 	pci_disable_busmaster(dev);
2473 	mtx_destroy(&sc->cmd_lock);
2474 	mtx_destroy(&sc->tx_lock);
2475 	sx_destroy(&sc->driver_lock);
2476 	if_free(sc->ifp);
2477 	bus_dma_tag_destroy(sc->parent_dmat);
2478 	return 0;
2479 }
2480 
2481 static int
2482 mxge_shutdown(device_t dev)
2483 {
2484 	return 0;
2485 }
2486 
2487 /*
2488   This file uses Myri10GE driver indentation.
2489 
2490   Local Variables:
2491   c-file-style:"linux"
2492   tab-width:8
2493   End:
2494 */
2495