xref: /freebsd/sys/dev/mxge/if_mxge.c (revision adfa0adec0b5d7c19c220a85ef6ca729235ed172)
1 /******************************************************************************
2 
3 Copyright (c) 2006, Myricom Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Myricom Inc, nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/linker.h>
40 #include <sys/firmware.h>
41 #include <sys/endian.h>
42 #include <sys/sockio.h>
43 #include <sys/mbuf.h>
44 #include <sys/malloc.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/module.h>
48 #include <sys/memrange.h>
49 #include <sys/socket.h>
50 #include <sys/sysctl.h>
51 #include <sys/sx.h>
52 
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/ethernet.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 
59 #include <net/bpf.h>
60 
61 #include <net/if_types.h>
62 #include <net/if_vlan_var.h>
63 #include <net/zlib.h>
64 
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 
69 #include <machine/bus.h>
70 #include <machine/resource.h>
71 #include <sys/bus.h>
72 #include <sys/rman.h>
73 
74 #include <dev/pci/pcireg.h>
75 #include <dev/pci/pcivar.h>
76 
77 #include <vm/vm.h>		/* for pmap_mapdev() */
78 #include <vm/pmap.h>
79 
80 #include <dev/mxge/mxge_mcp.h>
81 #include <dev/mxge/mcp_gen_header.h>
82 #include <dev/mxge/if_mxge_var.h>
83 
84 /* tunable params */
85 static int mxge_nvidia_ecrc_enable = 1;
86 static int mxge_max_intr_slots = 1024;
87 static int mxge_intr_coal_delay = 30;
88 static int mxge_deassert_wait = 1;
89 static int mxge_flow_control = 1;
90 static int mxge_verbose = 0;
91 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
92 static char *mxge_fw_aligned = "mxge_eth_z8e";
93 
94 static int mxge_probe(device_t dev);
95 static int mxge_attach(device_t dev);
96 static int mxge_detach(device_t dev);
97 static int mxge_shutdown(device_t dev);
98 static void mxge_intr(void *arg);
99 
100 static device_method_t mxge_methods[] =
101 {
102   /* Device interface */
103   DEVMETHOD(device_probe, mxge_probe),
104   DEVMETHOD(device_attach, mxge_attach),
105   DEVMETHOD(device_detach, mxge_detach),
106   DEVMETHOD(device_shutdown, mxge_shutdown),
107   {0, 0}
108 };
109 
110 static driver_t mxge_driver =
111 {
112   "mxge",
113   mxge_methods,
114   sizeof(mxge_softc_t),
115 };
116 
117 static devclass_t mxge_devclass;
118 
119 /* Declare ourselves to be a child of the PCI bus.*/
120 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
121 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
122 
123 static int
124 mxge_probe(device_t dev)
125 {
126   if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
127       (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) {
128 	  device_set_desc(dev, "Myri10G-PCIE-8A");
129 	  return 0;
130   }
131   return ENXIO;
132 }
133 
134 static void
135 mxge_enable_wc(mxge_softc_t *sc)
136 {
137 	struct mem_range_desc mrdesc;
138 	vm_paddr_t pa;
139 	vm_offset_t len;
140 	int err, action;
141 
142 	pa = rman_get_start(sc->mem_res);
143 	len = rman_get_size(sc->mem_res);
144 	mrdesc.mr_base = pa;
145 	mrdesc.mr_len = len;
146 	mrdesc.mr_flags = MDF_WRITECOMBINE;
147 	action = MEMRANGE_SET_UPDATE;
148 	strcpy((char *)&mrdesc.mr_owner, "mxge");
149 	err = mem_range_attr_set(&mrdesc, &action);
150 	if (err != 0) {
151 		device_printf(sc->dev,
152 			      "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n",
153 			      (unsigned long)pa, (unsigned long)len, err);
154 	} else {
155 		sc->wc = 1;
156 	}
157 }
158 
159 
160 /* callback to get our DMA address */
161 static void
162 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
163 			 int error)
164 {
165 	if (error == 0) {
166 		*(bus_addr_t *) arg = segs->ds_addr;
167 	}
168 }
169 
170 static int
171 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
172 		   bus_size_t alignment)
173 {
174 	int err;
175 	device_t dev = sc->dev;
176 
177 	/* allocate DMAable memory tags */
178 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
179 				 alignment,		/* alignment */
180 				 4096,			/* boundary */
181 				 BUS_SPACE_MAXADDR,	/* low */
182 				 BUS_SPACE_MAXADDR,	/* high */
183 				 NULL, NULL,		/* filter */
184 				 bytes,			/* maxsize */
185 				 1,			/* num segs */
186 				 4096,			/* maxsegsize */
187 				 BUS_DMA_COHERENT,	/* flags */
188 				 NULL, NULL,		/* lock */
189 				 &dma->dmat);		/* tag */
190 	if (err != 0) {
191 		device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
192 		return err;
193 	}
194 
195 	/* allocate DMAable memory & map */
196 	err = bus_dmamem_alloc(dma->dmat, &dma->addr,
197 			       (BUS_DMA_WAITOK | BUS_DMA_COHERENT
198 				| BUS_DMA_ZERO),  &dma->map);
199 	if (err != 0) {
200 		device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
201 		goto abort_with_dmat;
202 	}
203 
204 	/* load the memory */
205 	err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
206 			      mxge_dmamap_callback,
207 			      (void *)&dma->bus_addr, 0);
208 	if (err != 0) {
209 		device_printf(dev, "couldn't load map (err = %d)\n", err);
210 		goto abort_with_mem;
211 	}
212 	return 0;
213 
214 abort_with_mem:
215 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
216 abort_with_dmat:
217 	(void)bus_dma_tag_destroy(dma->dmat);
218 	return err;
219 }
220 
221 
222 static void
223 mxge_dma_free(mxge_dma_t *dma)
224 {
225 	bus_dmamap_unload(dma->dmat, dma->map);
226 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
227 	(void)bus_dma_tag_destroy(dma->dmat);
228 }
229 
230 /*
231  * The eeprom strings on the lanaiX have the format
232  * SN=x\0
233  * MAC=x:x:x:x:x:x\0
234  * PC=text\0
235  */
236 
237 static int
238 mxge_parse_strings(mxge_softc_t *sc)
239 {
240 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
241 
242 	char *ptr, *limit;
243 	int i, found_mac;
244 
245 	ptr = sc->eeprom_strings;
246 	limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
247 	found_mac = 0;
248 	while (ptr < limit && *ptr != '\0') {
249 		if (memcmp(ptr, "MAC=", 4) == 0) {
250 			ptr += 1;
251 			sc->mac_addr_string = ptr;
252 			for (i = 0; i < 6; i++) {
253 				ptr += 3;
254 				if ((ptr + 2) > limit)
255 					goto abort;
256 				sc->mac_addr[i] = strtoul(ptr, NULL, 16);
257 				found_mac = 1;
258 			}
259 		} else if (memcmp(ptr, "PC=", 3) == 0) {
260 			ptr += 3;
261 			strncpy(sc->product_code_string, ptr,
262 				sizeof (sc->product_code_string) - 1);
263 		} else if (memcmp(ptr, "SN=", 3) == 0) {
264 			ptr += 3;
265 			strncpy(sc->serial_number_string, ptr,
266 				sizeof (sc->serial_number_string) - 1);
267 		}
268 		MXGE_NEXT_STRING(ptr);
269 	}
270 
271 	if (found_mac)
272 		return 0;
273 
274  abort:
275 	device_printf(sc->dev, "failed to parse eeprom_strings\n");
276 
277 	return ENXIO;
278 }
279 
280 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
281 static int
282 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev)
283 {
284 	uint32_t val;
285 	unsigned long off;
286 	char *va, *cfgptr;
287 	uint16_t vendor_id, device_id;
288 	uintptr_t bus, slot, func, ivend, idev;
289 	uint32_t *ptr32;
290 
291 	/* XXXX
292 	   Test below is commented because it is believed that doing
293 	   config read/write beyond 0xff will access the config space
294 	   for the next larger function.  Uncomment this and remove
295 	   the hacky pmap_mapdev() way of accessing config space when
296 	   FreeBSD grows support for extended pcie config space access
297 	*/
298 #if 0
299 	/* See if we can, by some miracle, access the extended
300 	   config space */
301 	val = pci_read_config(pdev, 0x178, 4);
302 	if (val != 0xffffffff) {
303 		val |= 0x40;
304 		pci_write_config(pdev, 0x178, val, 4);
305 		return 0;
306 	}
307 #endif
308 	/* Rather than using normal pci config space writes, we must
309 	 * map the Nvidia config space ourselves.  This is because on
310 	 * opteron/nvidia class machine the 0xe000000 mapping is
311 	 * handled by the nvidia chipset, that means the internal PCI
312 	 * device (the on-chip northbridge), or the amd-8131 bridge
313 	 * and things behind them are not visible by this method.
314 	 */
315 
316 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
317 		      PCI_IVAR_BUS, &bus);
318 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
319 		      PCI_IVAR_SLOT, &slot);
320 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
321 		      PCI_IVAR_FUNCTION, &func);
322 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
323 		      PCI_IVAR_VENDOR, &ivend);
324 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
325 		      PCI_IVAR_DEVICE, &idev);
326 
327 	off =  0xe0000000UL
328 		+ 0x00100000UL * (unsigned long)bus
329 		+ 0x00001000UL * (unsigned long)(func
330 						 + 8 * slot);
331 
332 	/* map it into the kernel */
333 	va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
334 
335 
336 	if (va == NULL) {
337 		device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
338 		return EIO;
339 	}
340 	/* get a pointer to the config space mapped into the kernel */
341 	cfgptr = va + (off & PAGE_MASK);
342 
343 	/* make sure that we can really access it */
344 	vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
345 	device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
346 	if (! (vendor_id == ivend && device_id == idev)) {
347 		device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
348 			      vendor_id, device_id);
349 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
350 		return EIO;
351 	}
352 
353 	ptr32 = (uint32_t*)(cfgptr + 0x178);
354 	val = *ptr32;
355 
356 	if (val == 0xffffffff) {
357 		device_printf(sc->dev, "extended mapping failed\n");
358 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
359 		return EIO;
360 	}
361 	*ptr32 = val | 0x40;
362 	pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
363 	if (mxge_verbose)
364 		device_printf(sc->dev,
365 			      "Enabled ECRC on upstream Nvidia bridge "
366 			      "at %d:%d:%d\n",
367 			      (int)bus, (int)slot, (int)func);
368 	return 0;
369 }
370 #else
371 static int
372 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev)
373 {
374 	device_printf(sc->dev,
375 		      "Nforce 4 chipset on non-x86/amd64!?!?!\n");
376 	return ENXIO;
377 }
378 #endif
379 /*
380  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
381  * when the PCI-E Completion packets are aligned on an 8-byte
382  * boundary.  Some PCI-E chip sets always align Completion packets; on
383  * the ones that do not, the alignment can be enforced by enabling
384  * ECRC generation (if supported).
385  *
386  * When PCI-E Completion packets are not aligned, it is actually more
387  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
388  *
389  * If the driver can neither enable ECRC nor verify that it has
390  * already been enabled, then it must use a firmware image which works
391  * around unaligned completion packets (ethp_z8e.dat), and it should
392  * also ensure that it never gives the device a Read-DMA which is
393  * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
394  * enabled, then the driver should use the aligned (eth_z8e.dat)
395  * firmware image, and set tx.boundary to 4KB.
396  */
397 
398 static void
399 mxge_select_firmware(mxge_softc_t *sc)
400 {
401 	int err, aligned = 0;
402 	device_t pdev;
403 	uint16_t pvend, pdid;
404 
405 	pdev = device_get_parent(device_get_parent(sc->dev));
406 	if (pdev == NULL) {
407 		device_printf(sc->dev, "could not find parent?\n");
408 		goto abort;
409 	}
410 	pvend = pci_read_config(pdev, PCIR_VENDOR, 2);
411 	pdid = pci_read_config(pdev, PCIR_DEVICE, 2);
412 
413 	/* see if we can enable ECRC's on an upstream
414 	   Nvidia bridge */
415 	if (mxge_nvidia_ecrc_enable &&
416 	    (pvend == 0x10de && pdid == 0x005d)) {
417 		err = mxge_enable_nvidia_ecrc(sc, pdev);
418 		if (err == 0) {
419 			aligned = 1;
420 			if (mxge_verbose)
421 				device_printf(sc->dev,
422 					      "Assuming aligned completions"
423 					      " (ECRC)\n");
424 		}
425 	}
426 	/* see if the upstream bridge is known to
427 	   provided aligned completions */
428 	if (/* HT2000  */ (pvend == 0x1166 && pdid == 0x0132) ||
429 	    /* Ontario */ (pvend == 0x10b5 && pdid == 0x8532)) {
430 		if (mxge_verbose)
431 			device_printf(sc->dev,
432 				      "Assuming aligned completions "
433 				      "(0x%x:0x%x)\n", pvend, pdid);
434 	}
435 
436 abort:
437 	if (aligned) {
438 		sc->fw_name = mxge_fw_aligned;
439 		sc->tx.boundary = 4096;
440 	} else {
441 		sc->fw_name = mxge_fw_unaligned;
442 		sc->tx.boundary = 2048;
443 	}
444 }
445 
446 union qualhack
447 {
448         const char *ro_char;
449         char *rw_char;
450 };
451 
452 static int
453 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
454 {
455 	int major, minor;
456 
457 	if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
458 		device_printf(sc->dev, "Bad firmware type: 0x%x\n",
459 			      be32toh(hdr->mcp_type));
460 		return EIO;
461 	}
462 
463 	/* save firmware version for sysctl */
464 	strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
465 	if (mxge_verbose)
466 		device_printf(sc->dev, "firmware id: %s\n", hdr->version);
467 
468 	sscanf(sc->fw_version, "%d.%d", &major, &minor);
469 
470 	if (!(major == MXGEFW_VERSION_MAJOR
471 	      && minor == MXGEFW_VERSION_MINOR)) {
472 		device_printf(sc->dev, "Found firmware version %s\n",
473 			      sc->fw_version);
474 		device_printf(sc->dev, "Driver needs %d.%d\n",
475 			      MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
476 		return EINVAL;
477 	}
478 	return 0;
479 
480 }
481 
482 static int
483 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
484 {
485 	struct firmware *fw;
486 	const mcp_gen_header_t *hdr;
487 	unsigned hdr_offset;
488 	const char *fw_data;
489 	union qualhack hack;
490 	int status;
491 	unsigned int i;
492 	char dummy;
493 
494 
495 	fw = firmware_get(sc->fw_name);
496 
497 	if (fw == NULL) {
498 		device_printf(sc->dev, "Could not find firmware image %s\n",
499 			      sc->fw_name);
500 		return ENOENT;
501 	}
502 	if (fw->datasize > *limit ||
503 	    fw->datasize < MCP_HEADER_PTR_OFFSET + 4) {
504 		device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n",
505 			      sc->fw_name, (int)fw->datasize, (int) *limit);
506 		status = ENOSPC;
507 		goto abort_with_fw;
508 	}
509 	*limit = fw->datasize;
510 
511 	/* check id */
512 	fw_data = (const char *)fw->data;
513 	hdr_offset = htobe32(*(const uint32_t *)
514 			     (fw_data + MCP_HEADER_PTR_OFFSET));
515 	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) {
516 		device_printf(sc->dev, "Bad firmware file");
517 		status = EIO;
518 		goto abort_with_fw;
519 	}
520 	hdr = (const void*)(fw_data + hdr_offset);
521 
522 	status = mxge_validate_firmware(sc, hdr);
523 	if (status != 0)
524 		goto abort_with_fw;
525 
526 	hack.ro_char = fw_data;
527 	/* Copy the inflated firmware to NIC SRAM. */
528 	for (i = 0; i < *limit; i += 256) {
529 		mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
530 			      hack.rw_char + i,
531 			      min(256U, (unsigned)(*limit - i)));
532 		mb();
533 		dummy = *sc->sram;
534 		mb();
535 	}
536 
537 	status = 0;
538 abort_with_fw:
539 	firmware_put(fw, FIRMWARE_UNLOAD);
540 	return status;
541 }
542 
543 /*
544  * Enable or disable periodic RDMAs from the host to make certain
545  * chipsets resend dropped PCIe messages
546  */
547 
548 static void
549 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
550 {
551 	char buf_bytes[72];
552 	volatile uint32_t *confirm;
553 	volatile char *submit;
554 	uint32_t *buf, dma_low, dma_high;
555 	int i;
556 
557 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
558 
559 	/* clear confirmation addr */
560 	confirm = (volatile uint32_t *)sc->cmd;
561 	*confirm = 0;
562 	mb();
563 
564 	/* send an rdma command to the PCIe engine, and wait for the
565 	   response in the confirmation address.  The firmware should
566 	   write a -1 there to indicate it is alive and well
567 	*/
568 
569 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
570 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
571 	buf[0] = htobe32(dma_high);		/* confirm addr MSW */
572 	buf[1] = htobe32(dma_low);		/* confirm addr LSW */
573 	buf[2] = htobe32(0xffffffff);		/* confirm data */
574 	dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
575 	dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
576 	buf[3] = htobe32(dma_high); 		/* dummy addr MSW */
577 	buf[4] = htobe32(dma_low); 		/* dummy addr LSW */
578 	buf[5] = htobe32(enable);			/* enable? */
579 
580 
581 	submit = (volatile char *)(sc->sram + 0xfc01c0);
582 
583 	mxge_pio_copy(submit, buf, 64);
584 	mb();
585 	DELAY(1000);
586 	mb();
587 	i = 0;
588 	while (*confirm != 0xffffffff && i < 20) {
589 		DELAY(1000);
590 		i++;
591 	}
592 	if (*confirm != 0xffffffff) {
593 		device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
594 			      (enable ? "enable" : "disable"), confirm,
595 			      *confirm);
596 	}
597 	return;
598 }
599 
600 static int
601 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
602 {
603 	mcp_cmd_t *buf;
604 	char buf_bytes[sizeof(*buf) + 8];
605 	volatile mcp_cmd_response_t *response = sc->cmd;
606 	volatile char *cmd_addr = sc->sram + MXGEFW_CMD_OFFSET;
607 	uint32_t dma_low, dma_high;
608 	int sleep_total = 0;
609 
610 	/* ensure buf is aligned to 8 bytes */
611 	buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
612 
613 	buf->data0 = htobe32(data->data0);
614 	buf->data1 = htobe32(data->data1);
615 	buf->data2 = htobe32(data->data2);
616 	buf->cmd = htobe32(cmd);
617 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
618 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
619 
620 	buf->response_addr.low = htobe32(dma_low);
621 	buf->response_addr.high = htobe32(dma_high);
622 	mtx_lock(&sc->cmd_lock);
623 	response->result = 0xffffffff;
624 	mb();
625 	mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
626 
627 	/* wait up to 20ms */
628 	for (sleep_total = 0; sleep_total <  20; sleep_total++) {
629 		bus_dmamap_sync(sc->cmd_dma.dmat,
630 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
631 		mb();
632 		if (response->result != 0xffffffff) {
633 			if (response->result == 0) {
634 				data->data0 = be32toh(response->data);
635 				mtx_unlock(&sc->cmd_lock);
636 				return 0;
637 			} else {
638 				device_printf(sc->dev,
639 					      "mxge: command %d "
640 					      "failed, result = %d\n",
641 					      cmd, be32toh(response->result));
642 				mtx_unlock(&sc->cmd_lock);
643 				return ENXIO;
644 			}
645 		}
646 		DELAY(1000);
647 	}
648 	mtx_unlock(&sc->cmd_lock);
649 	device_printf(sc->dev, "mxge: command %d timed out"
650 		      "result = %d\n",
651 		      cmd, be32toh(response->result));
652 	return EAGAIN;
653 }
654 
655 static int
656 mxge_adopt_running_firmware(mxge_softc_t *sc)
657 {
658 	struct mcp_gen_header *hdr;
659 	const size_t bytes = sizeof (struct mcp_gen_header);
660 	size_t hdr_offset;
661 	int status;
662 
663 	/* find running firmware header */
664 	hdr_offset = htobe32(*(volatile uint32_t *)
665 			     (sc->sram + MCP_HEADER_PTR_OFFSET));
666 
667 	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
668 		device_printf(sc->dev,
669 			      "Running firmware has bad header offset (%d)\n",
670 			      (int)hdr_offset);
671 		return EIO;
672 	}
673 
674 	/* copy header of running firmware from SRAM to host memory to
675 	 * validate firmware */
676 	hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
677 	if (hdr == NULL) {
678 		device_printf(sc->dev, "could not malloc firmware hdr\n");
679 		return ENOMEM;
680 	}
681 	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
682 				rman_get_bushandle(sc->mem_res),
683 				hdr_offset, (char *)hdr, bytes);
684 	status = mxge_validate_firmware(sc, hdr);
685 	free(hdr, M_DEVBUF);
686 	return status;
687 }
688 
689 
690 static int
691 mxge_load_firmware(mxge_softc_t *sc)
692 {
693 	volatile uint32_t *confirm;
694 	volatile char *submit;
695 	char buf_bytes[72];
696 	uint32_t *buf, size, dma_low, dma_high;
697 	int status, i;
698 
699 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
700 
701 	size = sc->sram_size;
702 	status = mxge_load_firmware_helper(sc, &size);
703 	if (status) {
704 		/* Try to use the currently running firmware, if
705 		   it is new enough */
706 		status = mxge_adopt_running_firmware(sc);
707 		if (status) {
708 			device_printf(sc->dev,
709 				      "failed to adopt running firmware\n");
710 			return status;
711 		}
712 		device_printf(sc->dev,
713 			      "Successfully adopted running firmware\n");
714 		if (sc->tx.boundary == 4096) {
715 			device_printf(sc->dev,
716 				"Using firmware currently running on NIC"
717 				 ".  For optimal\n");
718 			device_printf(sc->dev,
719 				 "performance consider loading optimized "
720 				 "firmware\n");
721 		}
722 
723 	}
724 	/* clear confirmation addr */
725 	confirm = (volatile uint32_t *)sc->cmd;
726 	*confirm = 0;
727 	mb();
728 	/* send a reload command to the bootstrap MCP, and wait for the
729 	   response in the confirmation address.  The firmware should
730 	   write a -1 there to indicate it is alive and well
731 	*/
732 
733 	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
734 	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
735 
736 	buf[0] = htobe32(dma_high);	/* confirm addr MSW */
737 	buf[1] = htobe32(dma_low);	/* confirm addr LSW */
738 	buf[2] = htobe32(0xffffffff);	/* confirm data */
739 
740 	/* FIX: All newest firmware should un-protect the bottom of
741 	   the sram before handoff. However, the very first interfaces
742 	   do not. Therefore the handoff copy must skip the first 8 bytes
743 	*/
744 					/* where the code starts*/
745 	buf[3] = htobe32(MXGE_FW_OFFSET + 8);
746 	buf[4] = htobe32(size - 8); 	/* length of code */
747 	buf[5] = htobe32(8);		/* where to copy to */
748 	buf[6] = htobe32(0);		/* where to jump to */
749 
750 	submit = (volatile char *)(sc->sram + 0xfc0000);
751 	mxge_pio_copy(submit, buf, 64);
752 	mb();
753 	DELAY(1000);
754 	mb();
755 	i = 0;
756 	while (*confirm != 0xffffffff && i < 20) {
757 		DELAY(1000*10);
758 		i++;
759 		bus_dmamap_sync(sc->cmd_dma.dmat,
760 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
761 	}
762 	if (*confirm != 0xffffffff) {
763 		device_printf(sc->dev,"handoff failed (%p = 0x%x)",
764 			confirm, *confirm);
765 
766 		return ENXIO;
767 	}
768 	return 0;
769 }
770 
771 static int
772 mxge_update_mac_address(mxge_softc_t *sc)
773 {
774 	mxge_cmd_t cmd;
775 	uint8_t *addr = sc->mac_addr;
776 	int status;
777 
778 
779 	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
780 		     | (addr[2] << 8) | addr[3]);
781 
782 	cmd.data1 = ((addr[4] << 8) | (addr[5]));
783 
784 	status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
785 	return status;
786 }
787 
788 static int
789 mxge_change_pause(mxge_softc_t *sc, int pause)
790 {
791 	mxge_cmd_t cmd;
792 	int status;
793 
794 	if (pause)
795 		status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
796 				       &cmd);
797 	else
798 		status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
799 				       &cmd);
800 
801 	if (status) {
802 		device_printf(sc->dev, "Failed to set flow control mode\n");
803 		return ENXIO;
804 	}
805 	sc->pause = pause;
806 	return 0;
807 }
808 
809 static void
810 mxge_change_promisc(mxge_softc_t *sc, int promisc)
811 {
812 	mxge_cmd_t cmd;
813 	int status;
814 
815 	if (promisc)
816 		status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
817 				       &cmd);
818 	else
819 		status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
820 				       &cmd);
821 
822 	if (status) {
823 		device_printf(sc->dev, "Failed to set promisc mode\n");
824 	}
825 }
826 
827 static int
828 mxge_reset(mxge_softc_t *sc)
829 {
830 
831 	mxge_cmd_t cmd;
832 	mxge_dma_t dmabench_dma;
833 	size_t bytes;
834 	int status;
835 
836 	/* try to send a reset command to the card to see if it
837 	   is alive */
838 	memset(&cmd, 0, sizeof (cmd));
839 	status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
840 	if (status != 0) {
841 		device_printf(sc->dev, "failed reset\n");
842 		return ENXIO;
843 	}
844 
845 	mxge_dummy_rdma(sc, 1);
846 
847 	/* Now exchange information about interrupts  */
848 	bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);\
849 	memset(sc->rx_done.entry, 0, bytes);
850 	cmd.data0 = (uint32_t)bytes;
851 	status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
852 	cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr);
853 	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr);
854 	status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd);
855 
856 	status |= mxge_send_cmd(sc,
857 				MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
858 
859 
860 	sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
861 
862 	status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
863 	sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
864 
865 
866 	status |= mxge_send_cmd(sc,  MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
867 				&cmd);
868 	sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
869 	if (status != 0) {
870 		device_printf(sc->dev, "failed set interrupt parameters\n");
871 		return status;
872 	}
873 
874 
875 	*sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
876 
877 
878 	/* run a DMA benchmark */
879 	sc->read_dma = sc->write_dma = sc->read_write_dma = 0;
880 	status = mxge_dma_alloc(sc, &dmabench_dma, 4096, 4096);
881 	if (status)
882 		goto dmabench_fail;
883 
884 	/* Read DMA */
885 	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
886 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
887 	cmd.data2 = sc->tx.boundary * 0x10000;
888 
889 	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
890 	if (status != 0)
891 		device_printf(sc->dev, "read dma benchmark failed\n");
892 	else
893 		sc->read_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) /
894 			(cmd.data0 & 0xffff);
895 
896 	/* Write DMA */
897 	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
898 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
899 	cmd.data2 = sc->tx.boundary * 0x1;
900 	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
901 	if (status != 0)
902 		device_printf(sc->dev, "write dma benchmark failed\n");
903 	else
904 		sc->write_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) /
905 			(cmd.data0 & 0xffff);
906 	/* Read/Write DMA */
907 	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
908 	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
909 	cmd.data2 = sc->tx.boundary * 0x10001;
910 	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
911 	if (status != 0)
912 		device_printf(sc->dev, "read/write dma benchmark failed\n");
913 	else
914 		sc->read_write_dma =
915 			((cmd.data0>>16) * sc->tx.boundary * 2 * 2) /
916 			(cmd.data0 & 0xffff);
917 
918 	mxge_dma_free(&dmabench_dma);
919 
920 dmabench_fail:
921 	/* reset mcp/driver shared state back to 0 */
922 	bzero(sc->rx_done.entry, bytes);
923 	sc->rx_done.idx = 0;
924 	sc->rx_done.cnt = 0;
925 	sc->tx.req = 0;
926 	sc->tx.done = 0;
927 	sc->tx.pkt_done = 0;
928 	sc->rx_big.cnt = 0;
929 	sc->rx_small.cnt = 0;
930 	sc->rdma_tags_available = 15;
931 	status = mxge_update_mac_address(sc);
932 	mxge_change_promisc(sc, 0);
933 	mxge_change_pause(sc, sc->pause);
934 	return status;
935 }
936 
937 static int
938 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
939 {
940         mxge_softc_t *sc;
941         unsigned int intr_coal_delay;
942         int err;
943 
944         sc = arg1;
945         intr_coal_delay = sc->intr_coal_delay;
946         err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
947         if (err != 0) {
948                 return err;
949         }
950         if (intr_coal_delay == sc->intr_coal_delay)
951                 return 0;
952 
953         if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
954                 return EINVAL;
955 
956 	sx_xlock(&sc->driver_lock);
957 	*sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
958 	sc->intr_coal_delay = intr_coal_delay;
959 
960 	sx_xunlock(&sc->driver_lock);
961         return err;
962 }
963 
964 static int
965 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
966 {
967         mxge_softc_t *sc;
968         unsigned int enabled;
969         int err;
970 
971         sc = arg1;
972         enabled = sc->pause;
973         err = sysctl_handle_int(oidp, &enabled, arg2, req);
974         if (err != 0) {
975                 return err;
976         }
977         if (enabled == sc->pause)
978                 return 0;
979 
980 	sx_xlock(&sc->driver_lock);
981 	err = mxge_change_pause(sc, enabled);
982 	sx_xunlock(&sc->driver_lock);
983         return err;
984 }
985 
986 static int
987 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
988 {
989         int err;
990 
991         if (arg1 == NULL)
992                 return EFAULT;
993         arg2 = be32toh(*(int *)arg1);
994         arg1 = NULL;
995         err = sysctl_handle_int(oidp, arg1, arg2, req);
996 
997         return err;
998 }
999 
1000 static void
1001 mxge_add_sysctls(mxge_softc_t *sc)
1002 {
1003 	struct sysctl_ctx_list *ctx;
1004 	struct sysctl_oid_list *children;
1005 	mcp_irq_data_t *fw;
1006 
1007 	ctx = device_get_sysctl_ctx(sc->dev);
1008 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1009 	fw = sc->fw_stats;
1010 
1011 	/* random information */
1012 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1013 		       "firmware_version",
1014 		       CTLFLAG_RD, &sc->fw_version,
1015 		       0, "firmware version");
1016 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1017 		       "serial_number",
1018 		       CTLFLAG_RD, &sc->serial_number_string,
1019 		       0, "serial number");
1020 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1021 		       "product_code",
1022 		       CTLFLAG_RD, &sc->product_code_string,
1023 		       0, "product_code");
1024 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1025 		       "tx_boundary",
1026 		       CTLFLAG_RD, &sc->tx.boundary,
1027 		       0, "tx_boundary");
1028 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1029 		       "write_combine",
1030 		       CTLFLAG_RD, &sc->wc,
1031 		       0, "write combining PIO?");
1032 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1033 		       "read_dma_MBs",
1034 		       CTLFLAG_RD, &sc->read_dma,
1035 		       0, "DMA Read speed in MB/s");
1036 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1037 		       "write_dma_MBs",
1038 		       CTLFLAG_RD, &sc->write_dma,
1039 		       0, "DMA Write speed in MB/s");
1040 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1041 		       "read_write_dma_MBs",
1042 		       CTLFLAG_RD, &sc->read_write_dma,
1043 		       0, "DMA concurrent Read/Write speed in MB/s");
1044 
1045 
1046 	/* performance related tunables */
1047 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1048 			"intr_coal_delay",
1049 			CTLTYPE_INT|CTLFLAG_RW, sc,
1050 			0, mxge_change_intr_coal,
1051 			"I", "interrupt coalescing delay in usecs");
1052 
1053 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1054 			"flow_control_enabled",
1055 			CTLTYPE_INT|CTLFLAG_RW, sc,
1056 			0, mxge_change_flow_control,
1057 			"I", "interrupt coalescing delay in usecs");
1058 
1059 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1060 		       "deassert_wait",
1061 		       CTLFLAG_RW, &mxge_deassert_wait,
1062 		       0, "Wait for IRQ line to go low in ihandler");
1063 
1064 	/* stats block from firmware is in network byte order.
1065 	   Need to swap it */
1066 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1067 			"link_up",
1068 			CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1069 			0, mxge_handle_be32,
1070 			"I", "link up");
1071 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1072 			"rdma_tags_available",
1073 			CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1074 			0, mxge_handle_be32,
1075 			"I", "rdma_tags_available");
1076 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1077 			"dropped_link_overflow",
1078 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1079 			0, mxge_handle_be32,
1080 			"I", "dropped_link_overflow");
1081 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1082 			"dropped_link_error_or_filtered",
1083 			CTLTYPE_INT|CTLFLAG_RD,
1084 			&fw->dropped_link_error_or_filtered,
1085 			0, mxge_handle_be32,
1086 			"I", "dropped_link_error_or_filtered");
1087 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1088 			"dropped_runt",
1089 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1090 			0, mxge_handle_be32,
1091 			"I", "dropped_runt");
1092 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1093 			"dropped_overrun",
1094 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1095 			0, mxge_handle_be32,
1096 			"I", "dropped_overrun");
1097 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1098 			"dropped_no_small_buffer",
1099 			CTLTYPE_INT|CTLFLAG_RD,
1100 			&fw->dropped_no_small_buffer,
1101 			0, mxge_handle_be32,
1102 			"I", "dropped_no_small_buffer");
1103 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1104 			"dropped_no_big_buffer",
1105 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1106 			0, mxge_handle_be32,
1107 			"I", "dropped_no_big_buffer");
1108 
1109 	/* host counters exported for debugging */
1110 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1111 		       "rx_small_cnt",
1112 		       CTLFLAG_RD, &sc->rx_small.cnt,
1113 		       0, "rx_small_cnt");
1114 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1115 		       "rx_big_cnt",
1116 		       CTLFLAG_RD, &sc->rx_big.cnt,
1117 		       0, "rx_small_cnt");
1118 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1119 		       "tx_req",
1120 		       CTLFLAG_RD, &sc->tx.req,
1121 		       0, "tx_req");
1122 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1123 		       "tx_done",
1124 		       CTLFLAG_RD, &sc->tx.done,
1125 		       0, "tx_done");
1126 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1127 		       "tx_pkt_done",
1128 		       CTLFLAG_RD, &sc->tx.pkt_done,
1129 		       0, "tx_done");
1130 
1131 	/* verbose printing? */
1132 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1133 		       "verbose",
1134 		       CTLFLAG_RW, &mxge_verbose,
1135 		       0, "verbose printing");
1136 
1137 }
1138 
1139 /* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1140    backwards one at a time and handle ring wraps */
1141 
1142 static inline void
1143 mxge_submit_req_backwards(mxge_tx_buf_t *tx,
1144 			    mcp_kreq_ether_send_t *src, int cnt)
1145 {
1146         int idx, starting_slot;
1147         starting_slot = tx->req;
1148         while (cnt > 1) {
1149                 cnt--;
1150                 idx = (starting_slot + cnt) & tx->mask;
1151                 mxge_pio_copy(&tx->lanai[idx],
1152 			      &src[cnt], sizeof(*src));
1153                 mb();
1154         }
1155 }
1156 
1157 /*
1158  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1159  * at most 32 bytes at a time, so as to avoid involving the software
1160  * pio handler in the nic.   We re-write the first segment's flags
1161  * to mark them valid only after writing the entire chain
1162  */
1163 
1164 static inline void
1165 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
1166                   int cnt)
1167 {
1168         int idx, i;
1169         uint32_t *src_ints;
1170 	volatile uint32_t *dst_ints;
1171         mcp_kreq_ether_send_t *srcp;
1172 	volatile mcp_kreq_ether_send_t *dstp, *dst;
1173 	uint8_t last_flags;
1174 
1175         idx = tx->req & tx->mask;
1176 
1177 	last_flags = src->flags;
1178 	src->flags = 0;
1179         mb();
1180         dst = dstp = &tx->lanai[idx];
1181         srcp = src;
1182 
1183         if ((idx + cnt) < tx->mask) {
1184                 for (i = 0; i < (cnt - 1); i += 2) {
1185                         mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1186                         mb(); /* force write every 32 bytes */
1187                         srcp += 2;
1188                         dstp += 2;
1189                 }
1190         } else {
1191                 /* submit all but the first request, and ensure
1192                    that it is submitted below */
1193                 mxge_submit_req_backwards(tx, src, cnt);
1194                 i = 0;
1195         }
1196         if (i < cnt) {
1197                 /* submit the first request */
1198                 mxge_pio_copy(dstp, srcp, sizeof(*src));
1199                 mb(); /* barrier before setting valid flag */
1200         }
1201 
1202         /* re-write the last 32-bits with the valid flags */
1203         src->flags = last_flags;
1204         src_ints = (uint32_t *)src;
1205         src_ints+=3;
1206         dst_ints = (volatile uint32_t *)dst;
1207         dst_ints+=3;
1208         *dst_ints =  *src_ints;
1209         tx->req += cnt;
1210         mb();
1211 }
1212 
1213 static inline void
1214 mxge_submit_req_wc(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1215 {
1216     tx->req += cnt;
1217     mb();
1218     while (cnt >= 4) {
1219 	    mxge_pio_copy((volatile char *)tx->wc_fifo, src, 64);
1220 	    mb();
1221 	    src += 4;
1222 	    cnt -= 4;
1223     }
1224     if (cnt > 0) {
1225 	    /* pad it to 64 bytes.  The src is 64 bytes bigger than it
1226 	       needs to be so that we don't overrun it */
1227 	    mxge_pio_copy(tx->wc_fifo + (cnt<<18), src, 64);
1228 	    mb();
1229     }
1230 }
1231 
1232 static void
1233 mxge_encap(mxge_softc_t *sc, struct mbuf *m)
1234 {
1235 	mcp_kreq_ether_send_t *req;
1236 	bus_dma_segment_t seg_list[MXGE_MAX_SEND_DESC];
1237 	bus_dma_segment_t *seg;
1238 	struct mbuf *m_tmp;
1239 	struct ifnet *ifp;
1240 	mxge_tx_buf_t *tx;
1241 	struct ether_header *eh;
1242 	struct ip *ip;
1243 	int cnt, cum_len, err, i, idx;
1244 	uint16_t flags, pseudo_hdr_offset;
1245         uint8_t cksum_offset;
1246 
1247 
1248 
1249 	ifp = sc->ifp;
1250 	tx = &sc->tx;
1251 
1252 	/* (try to) map the frame for DMA */
1253 	idx = tx->req & tx->mask;
1254 	err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1255 				      m, seg_list, &cnt,
1256 				      BUS_DMA_NOWAIT);
1257 	if (err == EFBIG) {
1258 		/* Too many segments in the chain.  Try
1259 		   to defrag */
1260 		m_tmp = m_defrag(m, M_NOWAIT);
1261 		if (m_tmp == NULL) {
1262 			goto drop;
1263 		}
1264 		m = m_tmp;
1265 		err = bus_dmamap_load_mbuf_sg(tx->dmat,
1266 					      tx->info[idx].map,
1267 					      m, seg_list, &cnt,
1268 					      BUS_DMA_NOWAIT);
1269 	}
1270 	if (err != 0) {
1271 		device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d\n",
1272 			      err);
1273 		goto drop;
1274 	}
1275 	bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1276 			BUS_DMASYNC_PREWRITE);
1277 	tx->info[idx].m = m;
1278 
1279 	req = tx->req_list;
1280 	cksum_offset = 0;
1281 	pseudo_hdr_offset = 0;
1282 	flags = MXGEFW_FLAGS_NO_TSO;
1283 
1284 	/* checksum offloading? */
1285 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1286 		eh = mtod(m, struct ether_header *);
1287 		ip = (struct ip *) (eh + 1);
1288 		cksum_offset = sizeof(*eh) + (ip->ip_hl << 2);
1289 		pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
1290 		pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1291 		req->cksum_offset = cksum_offset;
1292 		flags |= MXGEFW_FLAGS_CKSUM;
1293 	}
1294 	if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1295 		flags |= MXGEFW_FLAGS_SMALL;
1296 
1297 	/* convert segments into a request list */
1298 	cum_len = 0;
1299 	seg = seg_list;
1300 	req->flags = MXGEFW_FLAGS_FIRST;
1301 	for (i = 0; i < cnt; i++) {
1302 		req->addr_low =
1303 			htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1304 		req->addr_high =
1305 			htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1306 		req->length = htobe16(seg->ds_len);
1307 		req->cksum_offset = cksum_offset;
1308 		if (cksum_offset > seg->ds_len)
1309 			cksum_offset -= seg->ds_len;
1310 		else
1311 			cksum_offset = 0;
1312 		req->pseudo_hdr_offset = pseudo_hdr_offset;
1313 		req->pad = 0; /* complete solid 16-byte block */
1314 		req->rdma_count = 1;
1315 		req->flags |= flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1316 		cum_len += seg->ds_len;
1317 		seg++;
1318 		req++;
1319 		req->flags = 0;
1320 	}
1321 	req--;
1322 	/* pad runts to 60 bytes */
1323 	if (cum_len < 60) {
1324 		req++;
1325 		req->addr_low =
1326 			htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
1327 		req->addr_high =
1328 			htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
1329 		req->length = htobe16(60 - cum_len);
1330 		req->cksum_offset = 0;
1331 		req->pseudo_hdr_offset = pseudo_hdr_offset;
1332 		req->pad = 0; /* complete solid 16-byte block */
1333 		req->rdma_count = 1;
1334 		req->flags |= flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1335 		cnt++;
1336 	}
1337 
1338 	tx->req_list[0].rdma_count = cnt;
1339 #if 0
1340 	/* print what the firmware will see */
1341 	for (i = 0; i < cnt; i++) {
1342 		printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1343 		    "cso:%d, flags:0x%x, rdma:%d\n",
1344 		    i, (int)ntohl(tx->req_list[i].addr_high),
1345 		    (int)ntohl(tx->req_list[i].addr_low),
1346 		    (int)ntohs(tx->req_list[i].length),
1347 		    (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1348 		    tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1349 		    tx->req_list[i].rdma_count);
1350 	}
1351 	printf("--------------\n");
1352 #endif
1353 	tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1354 	if (tx->wc_fifo == NULL)
1355 		mxge_submit_req(tx, tx->req_list, cnt);
1356 	else
1357 		mxge_submit_req_wc(tx, tx->req_list, cnt);
1358 	return;
1359 
1360 drop:
1361 	m_freem(m);
1362 	ifp->if_oerrors++;
1363 	return;
1364 }
1365 
1366 
1367 
1368 
1369 static inline void
1370 mxge_start_locked(mxge_softc_t *sc)
1371 {
1372 	struct mbuf *m;
1373 	struct ifnet *ifp;
1374 
1375 	ifp = sc->ifp;
1376 	while ((sc->tx.mask - (sc->tx.req - sc->tx.done))
1377 	       > MXGE_MAX_SEND_DESC) {
1378 
1379 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1380 		if (m == NULL) {
1381 			return;
1382 		}
1383 		/* let BPF see it */
1384 		BPF_MTAP(ifp, m);
1385 
1386 		/* give it to the nic */
1387 		mxge_encap(sc, m);
1388 	}
1389 	/* ran out of transmit slots */
1390 	sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1391 }
1392 
1393 static void
1394 mxge_start(struct ifnet *ifp)
1395 {
1396 	mxge_softc_t *sc = ifp->if_softc;
1397 
1398 
1399 	mtx_lock(&sc->tx_lock);
1400 	mxge_start_locked(sc);
1401 	mtx_unlock(&sc->tx_lock);
1402 }
1403 
1404 /*
1405  * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
1406  * at most 32 bytes at a time, so as to avoid involving the software
1407  * pio handler in the nic.   We re-write the first segment's low
1408  * DMA address to mark it valid only after we write the entire chunk
1409  * in a burst
1410  */
1411 static inline void
1412 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
1413 		mcp_kreq_ether_recv_t *src)
1414 {
1415 	uint32_t low;
1416 
1417 	low = src->addr_low;
1418 	src->addr_low = 0xffffffff;
1419 	mxge_pio_copy(dst, src, 8 * sizeof (*src));
1420 	mb();
1421 	dst->addr_low = low;
1422 	mb();
1423 }
1424 
1425 static int
1426 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx)
1427 {
1428 	bus_dma_segment_t seg;
1429 	struct mbuf *m;
1430 	mxge_rx_buf_t *rx = &sc->rx_small;
1431 	int cnt, err;
1432 
1433 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1434 	if (m == NULL) {
1435 		rx->alloc_fail++;
1436 		err = ENOBUFS;
1437 		goto done;
1438 	}
1439 	m->m_len = MHLEN;
1440 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1441 				      &seg, &cnt, BUS_DMA_NOWAIT);
1442 	if (err != 0) {
1443 		m_free(m);
1444 		goto done;
1445 	}
1446 	rx->info[idx].m = m;
1447 	rx->shadow[idx].addr_low =
1448 		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
1449 	rx->shadow[idx].addr_high =
1450 		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
1451 
1452 done:
1453 	if ((idx & 7) == 7) {
1454 		if (rx->wc_fifo == NULL)
1455 			mxge_submit_8rx(&rx->lanai[idx - 7],
1456 					&rx->shadow[idx - 7]);
1457 		else {
1458 			mb();
1459 			mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64);
1460 		}
1461         }
1462 	return err;
1463 }
1464 
1465 static int
1466 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx)
1467 {
1468 	bus_dma_segment_t seg;
1469 	struct mbuf *m;
1470 	mxge_rx_buf_t *rx = &sc->rx_big;
1471 	int cnt, err;
1472 
1473 	m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes);
1474 	if (m == NULL) {
1475 		rx->alloc_fail++;
1476 		err = ENOBUFS;
1477 		goto done;
1478 	}
1479 	m->m_len = sc->big_bytes;
1480 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1481 				      &seg, &cnt, BUS_DMA_NOWAIT);
1482 	if (err != 0) {
1483 		m_free(m);
1484 		goto done;
1485 	}
1486 	rx->info[idx].m = m;
1487 	rx->shadow[idx].addr_low =
1488 		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
1489 	rx->shadow[idx].addr_high =
1490 		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
1491 
1492 done:
1493 	if ((idx & 7) == 7) {
1494 		if (rx->wc_fifo == NULL)
1495 			mxge_submit_8rx(&rx->lanai[idx - 7],
1496 					&rx->shadow[idx - 7]);
1497 		else {
1498 			mb();
1499 			mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64);
1500 		}
1501         }
1502 	return err;
1503 }
1504 
1505 static inline void
1506 mxge_rx_csum(struct mbuf *m, int csum)
1507 {
1508 	struct ether_header *eh;
1509 	struct ip *ip;
1510 
1511 	eh = mtod(m, struct ether_header *);
1512 	if (__predict_true(eh->ether_type ==  htons(ETHERTYPE_IP))) {
1513 		ip = (struct ip *)(eh + 1);
1514 		if (__predict_true(ip->ip_p == IPPROTO_TCP ||
1515 				   ip->ip_p == IPPROTO_UDP)) {
1516 			m->m_pkthdr.csum_data = csum;
1517 			m->m_pkthdr.csum_flags = CSUM_DATA_VALID;
1518 		}
1519 	}
1520 }
1521 
1522 static inline void
1523 mxge_rx_done_big(mxge_softc_t *sc, int len, int csum)
1524 {
1525 	struct ifnet *ifp;
1526 	struct mbuf *m = 0; 		/* -Wunitialized */
1527 	struct mbuf *m_prev = 0;	/* -Wunitialized */
1528 	struct mbuf *m_head = 0;
1529 	bus_dmamap_t old_map;
1530 	mxge_rx_buf_t *rx;
1531 	int idx;
1532 
1533 
1534 	rx = &sc->rx_big;
1535 	ifp = sc->ifp;
1536 	while (len > 0) {
1537 		idx = rx->cnt & rx->mask;
1538                 rx->cnt++;
1539 		/* save a pointer to the received mbuf */
1540 		m = rx->info[idx].m;
1541 		/* try to replace the received mbuf */
1542 		if (mxge_get_buf_big(sc, rx->extra_map, idx)) {
1543 			goto drop;
1544 		}
1545 		/* unmap the received buffer */
1546 		old_map = rx->info[idx].map;
1547 		bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1548 		bus_dmamap_unload(rx->dmat, old_map);
1549 
1550 		/* swap the bus_dmamap_t's */
1551 		rx->info[idx].map = rx->extra_map;
1552 		rx->extra_map = old_map;
1553 
1554 		/* chain multiple segments together */
1555 		if (!m_head) {
1556 			m_head = m;
1557 			/* mcp implicitly skips 1st bytes so that
1558 			 * packet is properly aligned */
1559 			m->m_data += MXGEFW_PAD;
1560 			m->m_pkthdr.len = len;
1561 			m->m_len = sc->big_bytes - MXGEFW_PAD;
1562 		} else {
1563 			m->m_len = sc->big_bytes;
1564 			m->m_flags &= ~M_PKTHDR;
1565 			m_prev->m_next = m;
1566 		}
1567 		len -= m->m_len;
1568 		m_prev = m;
1569 	}
1570 
1571 	/* trim trailing garbage from the last mbuf in the chain.  If
1572 	 * there is any garbage, len will be negative */
1573 	m->m_len += len;
1574 
1575 	/* if the checksum is valid, mark it in the mbuf header */
1576 	if (sc->csum_flag)
1577 		mxge_rx_csum(m_head, csum);
1578 
1579 	/* pass the frame up the stack */
1580 	m_head->m_pkthdr.rcvif = ifp;
1581 	ifp->if_ipackets++;
1582 	(*ifp->if_input)(ifp, m_head);
1583 	return;
1584 
1585 drop:
1586 	/* drop the frame -- the old mbuf(s) are re-cycled by running
1587 	   every slot through the allocator */
1588         if (m_head) {
1589                 len -= sc->big_bytes;
1590                 m_freem(m_head);
1591         } else {
1592                 len -= (sc->big_bytes + MXGEFW_PAD);
1593         }
1594         while ((int)len > 0) {
1595                 idx = rx->cnt & rx->mask;
1596                 rx->cnt++;
1597                 m = rx->info[idx].m;
1598                 if (0 == (mxge_get_buf_big(sc, rx->extra_map, idx))) {
1599 			m_freem(m);
1600 			/* unmap the received buffer */
1601 			old_map = rx->info[idx].map;
1602 			bus_dmamap_sync(rx->dmat, old_map,
1603 					BUS_DMASYNC_POSTREAD);
1604 			bus_dmamap_unload(rx->dmat, old_map);
1605 
1606 			/* swap the bus_dmamap_t's */
1607 			rx->info[idx].map = rx->extra_map;
1608 			rx->extra_map = old_map;
1609 		}
1610                 len -= sc->big_bytes;
1611         }
1612 
1613 	ifp->if_ierrors++;
1614 
1615 }
1616 
1617 static inline void
1618 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum)
1619 {
1620 	struct ifnet *ifp;
1621 	struct mbuf *m;
1622 	mxge_rx_buf_t *rx;
1623 	bus_dmamap_t old_map;
1624 	int idx;
1625 
1626 	ifp = sc->ifp;
1627 	rx = &sc->rx_small;
1628 	idx = rx->cnt & rx->mask;
1629 	rx->cnt++;
1630 	/* save a pointer to the received mbuf */
1631 	m = rx->info[idx].m;
1632 	/* try to replace the received mbuf */
1633 	if (mxge_get_buf_small(sc, rx->extra_map, idx)) {
1634 		/* drop the frame -- the old mbuf is re-cycled */
1635 		ifp->if_ierrors++;
1636 		return;
1637 	}
1638 
1639 	/* unmap the received buffer */
1640 	old_map = rx->info[idx].map;
1641 	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1642 	bus_dmamap_unload(rx->dmat, old_map);
1643 
1644 	/* swap the bus_dmamap_t's */
1645 	rx->info[idx].map = rx->extra_map;
1646 	rx->extra_map = old_map;
1647 
1648 	/* mcp implicitly skips 1st 2 bytes so that packet is properly
1649 	 * aligned */
1650 	m->m_data += MXGEFW_PAD;
1651 
1652 	/* if the checksum is valid, mark it in the mbuf header */
1653 	if (sc->csum_flag)
1654 		mxge_rx_csum(m, csum);
1655 
1656 	/* pass the frame up the stack */
1657 	m->m_pkthdr.rcvif = ifp;
1658 	m->m_len = m->m_pkthdr.len = len;
1659 	ifp->if_ipackets++;
1660 	(*ifp->if_input)(ifp, m);
1661 }
1662 
1663 static inline void
1664 mxge_clean_rx_done(mxge_softc_t *sc)
1665 {
1666 	mxge_rx_done_t *rx_done = &sc->rx_done;
1667 	int limit = 0;
1668 	uint16_t length;
1669 	uint16_t checksum;
1670 
1671 
1672 	while (rx_done->entry[rx_done->idx].length != 0) {
1673 		length = ntohs(rx_done->entry[rx_done->idx].length);
1674 		rx_done->entry[rx_done->idx].length = 0;
1675 		checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
1676 		if (length <= MHLEN)
1677 			mxge_rx_done_small(sc, length, checksum);
1678 		else
1679 			mxge_rx_done_big(sc, length, checksum);
1680 		rx_done->cnt++;
1681 		rx_done->idx = rx_done->cnt & (mxge_max_intr_slots - 1);
1682 
1683 		/* limit potential for livelock */
1684 		if (__predict_false(++limit > 2 * mxge_max_intr_slots))
1685 			break;
1686 
1687 	}
1688 }
1689 
1690 
1691 static inline void
1692 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx)
1693 {
1694 	struct ifnet *ifp;
1695 	mxge_tx_buf_t *tx;
1696 	struct mbuf *m;
1697 	bus_dmamap_t map;
1698 	int idx, limit;
1699 
1700 	limit = 0;
1701 	tx = &sc->tx;
1702 	ifp = sc->ifp;
1703 	while (tx->pkt_done != mcp_idx) {
1704 		idx = tx->done & tx->mask;
1705 		tx->done++;
1706 		m = tx->info[idx].m;
1707 		/* mbuf and DMA map only attached to the first
1708 		   segment per-mbuf */
1709 		if (m != NULL) {
1710 			ifp->if_opackets++;
1711 			tx->info[idx].m = NULL;
1712 			map = tx->info[idx].map;
1713 			bus_dmamap_unload(tx->dmat, map);
1714 			m_freem(m);
1715 		}
1716 		if (tx->info[idx].flag) {
1717 			tx->info[idx].flag = 0;
1718 			tx->pkt_done++;
1719 		}
1720 		/* limit potential for livelock by only handling
1721 		   2 full tx rings per call */
1722 		if (__predict_false(++limit >  2 * tx->mask))
1723 			break;
1724 	}
1725 
1726 	/* If we have space, clear IFF_OACTIVE to tell the stack that
1727            its OK to send packets */
1728 
1729 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
1730 	    tx->req - tx->done < (tx->mask + 1)/4) {
1731 		mtx_lock(&sc->tx_lock);
1732 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1733 		mxge_start_locked(sc);
1734 		mtx_unlock(&sc->tx_lock);
1735 	}
1736 }
1737 
1738 static void
1739 mxge_intr(void *arg)
1740 {
1741 	mxge_softc_t *sc = arg;
1742 	mcp_irq_data_t *stats = sc->fw_stats;
1743 	mxge_tx_buf_t *tx = &sc->tx;
1744 	mxge_rx_done_t *rx_done = &sc->rx_done;
1745 	uint32_t send_done_count;
1746 	uint8_t valid;
1747 
1748 
1749 	/* make sure the DMA has finished */
1750 	if (!stats->valid) {
1751 		return;
1752 	}
1753 	valid = stats->valid;
1754 
1755 	/* lower legacy IRQ  */
1756 	*sc->irq_deassert = 0;
1757 	mb();
1758 	if (!mxge_deassert_wait)
1759 		/* don't wait for conf. that irq is low */
1760 		stats->valid = 0;
1761 	do {
1762 		/* check for transmit completes and receives */
1763 		send_done_count = be32toh(stats->send_done_count);
1764 		while ((send_done_count != tx->pkt_done) ||
1765 		       (rx_done->entry[rx_done->idx].length != 0)) {
1766 			mxge_tx_done(sc, (int)send_done_count);
1767 			mxge_clean_rx_done(sc);
1768 			send_done_count = be32toh(stats->send_done_count);
1769 		}
1770 	} while (*((volatile uint8_t *) &stats->valid));
1771 
1772 	if (__predict_false(stats->stats_updated)) {
1773 		if (sc->link_state != stats->link_up) {
1774 			sc->link_state = stats->link_up;
1775 			if (sc->link_state) {
1776 				if_link_state_change(sc->ifp, LINK_STATE_UP);
1777 				if (mxge_verbose)
1778 					device_printf(sc->dev, "link up\n");
1779 			} else {
1780 				if_link_state_change(sc->ifp, LINK_STATE_DOWN);
1781 				if (mxge_verbose)
1782 					device_printf(sc->dev, "link down\n");
1783 			}
1784 		}
1785 		if (sc->rdma_tags_available !=
1786 		    be32toh(sc->fw_stats->rdma_tags_available)) {
1787 			sc->rdma_tags_available =
1788 				be32toh(sc->fw_stats->rdma_tags_available);
1789 			device_printf(sc->dev, "RDMA timed out! %d tags "
1790 				      "left\n", sc->rdma_tags_available);
1791 		}
1792 		sc->down_cnt += stats->link_down;
1793 	}
1794 
1795 	/* check to see if we have rx token to pass back */
1796 	if (valid & 0x1)
1797 	    *sc->irq_claim = be32toh(3);
1798 	*(sc->irq_claim + 1) = be32toh(3);
1799 }
1800 
1801 static void
1802 mxge_watchdog(struct ifnet *ifp)
1803 {
1804 	printf("%s called\n", __FUNCTION__);
1805 }
1806 
1807 static void
1808 mxge_init(void *arg)
1809 {
1810 }
1811 
1812 
1813 
1814 static void
1815 mxge_free_mbufs(mxge_softc_t *sc)
1816 {
1817 	int i;
1818 
1819 	for (i = 0; i <= sc->rx_big.mask; i++) {
1820 		if (sc->rx_big.info[i].m == NULL)
1821 			continue;
1822 		bus_dmamap_unload(sc->rx_big.dmat,
1823 				  sc->rx_big.info[i].map);
1824 		m_freem(sc->rx_big.info[i].m);
1825 		sc->rx_big.info[i].m = NULL;
1826 	}
1827 
1828 	for (i = 0; i <= sc->rx_big.mask; i++) {
1829 		if (sc->rx_big.info[i].m == NULL)
1830 			continue;
1831 		bus_dmamap_unload(sc->rx_big.dmat,
1832 				  sc->rx_big.info[i].map);
1833 		m_freem(sc->rx_big.info[i].m);
1834 		sc->rx_big.info[i].m = NULL;
1835 	}
1836 
1837 	for (i = 0; i <= sc->tx.mask; i++) {
1838 		if (sc->tx.info[i].m == NULL)
1839 			continue;
1840 		bus_dmamap_unload(sc->tx.dmat,
1841 				  sc->tx.info[i].map);
1842 		m_freem(sc->tx.info[i].m);
1843 		sc->tx.info[i].m = NULL;
1844 	}
1845 }
1846 
1847 static void
1848 mxge_free_rings(mxge_softc_t *sc)
1849 {
1850 	int i;
1851 
1852 	if (sc->tx.req_bytes != NULL) {
1853 		free(sc->tx.req_bytes, M_DEVBUF);
1854 	}
1855 	if (sc->rx_small.shadow != NULL)
1856 		free(sc->rx_small.shadow, M_DEVBUF);
1857 	if (sc->rx_big.shadow != NULL)
1858 		free(sc->rx_big.shadow, M_DEVBUF);
1859 	if (sc->tx.info != NULL) {
1860 		for (i = 0; i <= sc->tx.mask; i++) {
1861 			if (sc->tx.info[i].map != NULL)
1862 				bus_dmamap_destroy(sc->tx.dmat,
1863 						   sc->tx.info[i].map);
1864 		}
1865 		free(sc->tx.info, M_DEVBUF);
1866 	}
1867 	if (sc->rx_small.info != NULL) {
1868 		for (i = 0; i <= sc->rx_small.mask; i++) {
1869 			if (sc->rx_small.info[i].map != NULL)
1870 				bus_dmamap_destroy(sc->rx_small.dmat,
1871 						   sc->rx_small.info[i].map);
1872 		}
1873 		free(sc->rx_small.info, M_DEVBUF);
1874 	}
1875 	if (sc->rx_big.info != NULL) {
1876 		for (i = 0; i <= sc->rx_big.mask; i++) {
1877 			if (sc->rx_big.info[i].map != NULL)
1878 				bus_dmamap_destroy(sc->rx_big.dmat,
1879 						   sc->rx_big.info[i].map);
1880 		}
1881 		free(sc->rx_big.info, M_DEVBUF);
1882 	}
1883 	if (sc->rx_big.extra_map != NULL)
1884 		bus_dmamap_destroy(sc->rx_big.dmat,
1885 				   sc->rx_big.extra_map);
1886 	if (sc->rx_small.extra_map != NULL)
1887 		bus_dmamap_destroy(sc->rx_small.dmat,
1888 				   sc->rx_small.extra_map);
1889 	if (sc->tx.dmat != NULL)
1890 		bus_dma_tag_destroy(sc->tx.dmat);
1891 	if (sc->rx_small.dmat != NULL)
1892 		bus_dma_tag_destroy(sc->rx_small.dmat);
1893 	if (sc->rx_big.dmat != NULL)
1894 		bus_dma_tag_destroy(sc->rx_big.dmat);
1895 }
1896 
1897 static int
1898 mxge_alloc_rings(mxge_softc_t *sc)
1899 {
1900 	mxge_cmd_t cmd;
1901 	int tx_ring_size, rx_ring_size;
1902 	int tx_ring_entries, rx_ring_entries;
1903 	int i, err;
1904 	unsigned long bytes;
1905 
1906 	/* get ring sizes */
1907 	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1908 	tx_ring_size = cmd.data0;
1909 	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1910 	if (err != 0) {
1911 		device_printf(sc->dev, "Cannot determine ring sizes\n");
1912 		goto abort_with_nothing;
1913 	}
1914 
1915 	rx_ring_size = cmd.data0;
1916 
1917 	tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
1918 	rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
1919 	sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
1920 	IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
1921 	IFQ_SET_READY(&sc->ifp->if_snd);
1922 
1923 	sc->tx.mask = tx_ring_entries - 1;
1924 	sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
1925 
1926 	err = ENOMEM;
1927 
1928 	/* allocate the tx request copy block */
1929 	bytes = 8 +
1930 		sizeof (*sc->tx.req_list) * (MXGE_MAX_SEND_DESC + 4);
1931 	sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
1932 	if (sc->tx.req_bytes == NULL)
1933 		goto abort_with_nothing;
1934 	/* ensure req_list entries are aligned to 8 bytes */
1935 	sc->tx.req_list = (mcp_kreq_ether_send_t *)
1936 		((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
1937 
1938 	/* allocate the rx shadow rings */
1939 	bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
1940 	sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1941 	if (sc->rx_small.shadow == NULL)
1942 		goto abort_with_alloc;
1943 
1944 	bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
1945 	sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1946 	if (sc->rx_big.shadow == NULL)
1947 		goto abort_with_alloc;
1948 
1949 	/* allocate the host info rings */
1950 	bytes = tx_ring_entries * sizeof (*sc->tx.info);
1951 	sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1952 	if (sc->tx.info == NULL)
1953 		goto abort_with_alloc;
1954 
1955 	bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
1956 	sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1957 	if (sc->rx_small.info == NULL)
1958 		goto abort_with_alloc;
1959 
1960 	bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
1961 	sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1962 	if (sc->rx_big.info == NULL)
1963 		goto abort_with_alloc;
1964 
1965 	/* allocate the busdma resources */
1966 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1967 				 1,			/* alignment */
1968 				 sc->tx.boundary,	/* boundary */
1969 				 BUS_SPACE_MAXADDR,	/* low */
1970 				 BUS_SPACE_MAXADDR,	/* high */
1971 				 NULL, NULL,		/* filter */
1972 				 MXGE_MAX_ETHER_MTU,	/* maxsize */
1973 				 MXGE_MAX_SEND_DESC,	/* num segs */
1974 				 sc->tx.boundary,	/* maxsegsize */
1975 				 BUS_DMA_ALLOCNOW,	/* flags */
1976 				 NULL, NULL,		/* lock */
1977 				 &sc->tx.dmat);		/* tag */
1978 
1979 	if (err != 0) {
1980 		device_printf(sc->dev, "Err %d allocating tx dmat\n",
1981 			      err);
1982 		goto abort_with_alloc;
1983 	}
1984 
1985 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1986 				 1,			/* alignment */
1987 				 4096,			/* boundary */
1988 				 BUS_SPACE_MAXADDR,	/* low */
1989 				 BUS_SPACE_MAXADDR,	/* high */
1990 				 NULL, NULL,		/* filter */
1991 				 MHLEN,			/* maxsize */
1992 				 1,			/* num segs */
1993 				 MHLEN,			/* maxsegsize */
1994 				 BUS_DMA_ALLOCNOW,	/* flags */
1995 				 NULL, NULL,		/* lock */
1996 				 &sc->rx_small.dmat);	/* tag */
1997 	if (err != 0) {
1998 		device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
1999 			      err);
2000 		goto abort_with_alloc;
2001 	}
2002 
2003 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
2004 				 1,			/* alignment */
2005 				 4096,			/* boundary */
2006 				 BUS_SPACE_MAXADDR,	/* low */
2007 				 BUS_SPACE_MAXADDR,	/* high */
2008 				 NULL, NULL,		/* filter */
2009 				 4096,			/* maxsize */
2010 				 1,			/* num segs */
2011 				 4096,			/* maxsegsize */
2012 				 BUS_DMA_ALLOCNOW,	/* flags */
2013 				 NULL, NULL,		/* lock */
2014 				 &sc->rx_big.dmat);	/* tag */
2015 	if (err != 0) {
2016 		device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
2017 			      err);
2018 		goto abort_with_alloc;
2019 	}
2020 
2021 	/* now use these tags to setup dmamaps for each slot
2022 	   in each ring */
2023 	for (i = 0; i <= sc->tx.mask; i++) {
2024 		err = bus_dmamap_create(sc->tx.dmat, 0,
2025 					&sc->tx.info[i].map);
2026 		if (err != 0) {
2027 			device_printf(sc->dev, "Err %d  tx dmamap\n",
2028 			      err);
2029 			goto abort_with_alloc;
2030 		}
2031 	}
2032 	for (i = 0; i <= sc->rx_small.mask; i++) {
2033 		err = bus_dmamap_create(sc->rx_small.dmat, 0,
2034 					&sc->rx_small.info[i].map);
2035 		if (err != 0) {
2036 			device_printf(sc->dev, "Err %d  rx_small dmamap\n",
2037 				      err);
2038 			goto abort_with_alloc;
2039 		}
2040 	}
2041 	err = bus_dmamap_create(sc->rx_small.dmat, 0,
2042 				&sc->rx_small.extra_map);
2043 	if (err != 0) {
2044 		device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
2045 			      err);
2046 			goto abort_with_alloc;
2047 	}
2048 
2049 	for (i = 0; i <= sc->rx_big.mask; i++) {
2050 		err = bus_dmamap_create(sc->rx_big.dmat, 0,
2051 					&sc->rx_big.info[i].map);
2052 		if (err != 0) {
2053 			device_printf(sc->dev, "Err %d  rx_big dmamap\n",
2054 			      err);
2055 			goto abort_with_alloc;
2056 		}
2057 	}
2058 	err = bus_dmamap_create(sc->rx_big.dmat, 0,
2059 				&sc->rx_big.extra_map);
2060 	if (err != 0) {
2061 		device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
2062 			      err);
2063 			goto abort_with_alloc;
2064 	}
2065 	return 0;
2066 
2067 abort_with_alloc:
2068 	mxge_free_rings(sc);
2069 
2070 abort_with_nothing:
2071 	return err;
2072 }
2073 
2074 static int
2075 mxge_open(mxge_softc_t *sc)
2076 {
2077 	mxge_cmd_t cmd;
2078 	int i, err;
2079 	bus_dmamap_t map;
2080 
2081 
2082 	/* Copy the MAC address in case it was overridden */
2083 	bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
2084 
2085 	err = mxge_reset(sc);
2086 	if (err != 0) {
2087 		device_printf(sc->dev, "failed to reset\n");
2088 		return EIO;
2089 	}
2090 
2091 	if (MCLBYTES >=
2092 	    sc->ifp->if_mtu + ETHER_HDR_LEN + MXGEFW_PAD)
2093 		sc->big_bytes = MCLBYTES;
2094 	else
2095 		sc->big_bytes = MJUMPAGESIZE;
2096 
2097 	err = mxge_alloc_rings(sc);
2098 	if (err != 0) {
2099 		device_printf(sc->dev, "failed to allocate rings\n");
2100 		return err;
2101 	}
2102 
2103 	err = bus_setup_intr(sc->dev, sc->irq_res,
2104 			     INTR_TYPE_NET | INTR_MPSAFE,
2105 			     mxge_intr, sc, &sc->ih);
2106 	if (err != 0) {
2107 		goto abort_with_rings;
2108 	}
2109 
2110 	/* get the lanai pointers to the send and receive rings */
2111 
2112 	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
2113 	sc->tx.lanai =
2114 		(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
2115 	err |= mxge_send_cmd(sc,
2116 				 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
2117 	sc->rx_small.lanai =
2118 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2119 	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
2120 	sc->rx_big.lanai =
2121 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2122 
2123 	if (err != 0) {
2124 		device_printf(sc->dev,
2125 			      "failed to get ring sizes or locations\n");
2126 		err = EIO;
2127 		goto abort_with_irq;
2128 	}
2129 
2130 	if (sc->wc) {
2131 		sc->tx.wc_fifo = sc->sram + 0x200000;
2132 		sc->rx_small.wc_fifo = sc->sram + 0x300000;
2133 		sc->rx_big.wc_fifo = sc->sram + 0x340000;
2134 	} else {
2135 		sc->tx.wc_fifo = 0;
2136 		sc->rx_small.wc_fifo = 0;
2137 		sc->rx_big.wc_fifo = 0;
2138 	}
2139 
2140 
2141 	/* stock receive rings */
2142 	for (i = 0; i <= sc->rx_small.mask; i++) {
2143 		map = sc->rx_small.info[i].map;
2144 		err = mxge_get_buf_small(sc, map, i);
2145 		if (err) {
2146 			device_printf(sc->dev, "alloced %d/%d smalls\n",
2147 				      i, sc->rx_small.mask + 1);
2148 			goto abort;
2149 		}
2150 	}
2151 	for (i = 0; i <= sc->rx_big.mask; i++) {
2152 		map = sc->rx_big.info[i].map;
2153 		err = mxge_get_buf_big(sc, map, i);
2154 		if (err) {
2155 			device_printf(sc->dev, "alloced %d/%d bigs\n",
2156 				      i, sc->rx_big.mask + 1);
2157 			goto abort;
2158 		}
2159 	}
2160 
2161 	/* Give the firmware the mtu and the big and small buffer
2162 	   sizes.  The firmware wants the big buf size to be a power
2163 	   of two. Luckily, FreeBSD's clusters are powers of two */
2164 	cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN;
2165 	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
2166 	cmd.data0 = MHLEN;
2167 	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
2168 			     &cmd);
2169 	cmd.data0 = sc->big_bytes;
2170 	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2171 	/* Now give him the pointer to the stats block */
2172 	cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
2173 	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
2174 	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA, &cmd);
2175 
2176 	if (err != 0) {
2177 		device_printf(sc->dev, "failed to setup params\n");
2178 		goto abort;
2179 	}
2180 
2181 	/* Finally, start the firmware running */
2182 	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
2183 	if (err) {
2184 		device_printf(sc->dev, "Couldn't bring up link\n");
2185 		goto abort;
2186 	}
2187 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2188 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2189 
2190 	return 0;
2191 
2192 
2193 abort:
2194 	mxge_free_mbufs(sc);
2195 abort_with_irq:
2196 	bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2197 abort_with_rings:
2198 	mxge_free_rings(sc);
2199 	return err;
2200 }
2201 
2202 static int
2203 mxge_close(mxge_softc_t *sc)
2204 {
2205 	mxge_cmd_t cmd;
2206 	int err, old_down_cnt;
2207 
2208 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2209 	old_down_cnt = sc->down_cnt;
2210 	mb();
2211 	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2212 	if (err) {
2213 		device_printf(sc->dev, "Couldn't bring down link\n");
2214 	}
2215 	if (old_down_cnt == sc->down_cnt) {
2216 		/* wait for down irq */
2217 		(void)tsleep(&sc->down_cnt, PWAIT, "down mxge", hz);
2218 	}
2219 	if (old_down_cnt == sc->down_cnt) {
2220 		device_printf(sc->dev, "never got down irq\n");
2221 	}
2222 	if (sc->ih != NULL)
2223 		bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2224 	mxge_free_mbufs(sc);
2225 	mxge_free_rings(sc);
2226 	return 0;
2227 }
2228 
2229 
2230 static int
2231 mxge_media_change(struct ifnet *ifp)
2232 {
2233 	return EINVAL;
2234 }
2235 
2236 static int
2237 mxge_change_mtu(mxge_softc_t *sc, int mtu)
2238 {
2239 	struct ifnet *ifp = sc->ifp;
2240 	int real_mtu, old_mtu;
2241 	int err = 0;
2242 
2243 
2244 	real_mtu = mtu + ETHER_HDR_LEN;
2245 	if ((real_mtu > MXGE_MAX_ETHER_MTU) ||
2246 	    real_mtu < 60)
2247 		return EINVAL;
2248 	sx_xlock(&sc->driver_lock);
2249 	old_mtu = ifp->if_mtu;
2250 	ifp->if_mtu = mtu;
2251 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2252 		mxge_close(sc);
2253 		err = mxge_open(sc);
2254 		if (err != 0) {
2255 			ifp->if_mtu = old_mtu;
2256 			mxge_close(sc);
2257 			(void) mxge_open(sc);
2258 		}
2259 	}
2260 	sx_xunlock(&sc->driver_lock);
2261 	return err;
2262 }
2263 
2264 static void
2265 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2266 {
2267 	mxge_softc_t *sc = ifp->if_softc;
2268 
2269 
2270 	if (sc == NULL)
2271 		return;
2272 	ifmr->ifm_status = IFM_AVALID;
2273 	ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0;
2274 	ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
2275 	ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0;
2276 }
2277 
2278 static int
2279 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2280 {
2281 	mxge_softc_t *sc = ifp->if_softc;
2282 	struct ifreq *ifr = (struct ifreq *)data;
2283 	int err, mask;
2284 
2285 	err = 0;
2286 	switch (command) {
2287 	case SIOCSIFADDR:
2288 	case SIOCGIFADDR:
2289 		err = ether_ioctl(ifp, command, data);
2290 		break;
2291 
2292 	case SIOCSIFMTU:
2293 		err = mxge_change_mtu(sc, ifr->ifr_mtu);
2294 		break;
2295 
2296 	case SIOCSIFFLAGS:
2297 		sx_xlock(&sc->driver_lock);
2298 		if (ifp->if_flags & IFF_UP) {
2299 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
2300 				err = mxge_open(sc);
2301 		} else {
2302 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2303 				mxge_close(sc);
2304 		}
2305 		sx_xunlock(&sc->driver_lock);
2306 		break;
2307 
2308 	case SIOCADDMULTI:
2309 	case SIOCDELMULTI:
2310 		err = 0;
2311 		break;
2312 
2313 	case SIOCSIFCAP:
2314 		sx_xlock(&sc->driver_lock);
2315 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2316 		if (mask & IFCAP_TXCSUM) {
2317 			if (IFCAP_TXCSUM & ifp->if_capenable) {
2318 				ifp->if_capenable &= ~IFCAP_TXCSUM;
2319 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
2320 			} else {
2321 				ifp->if_capenable |= IFCAP_TXCSUM;
2322 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
2323 			}
2324 		} else if (mask & IFCAP_RXCSUM) {
2325 			if (IFCAP_RXCSUM & ifp->if_capenable) {
2326 				ifp->if_capenable &= ~IFCAP_RXCSUM;
2327 				sc->csum_flag = 0;
2328 			} else {
2329 				ifp->if_capenable |= IFCAP_RXCSUM;
2330 				sc->csum_flag = 1;
2331 			}
2332 		}
2333 		sx_xunlock(&sc->driver_lock);
2334 		break;
2335 
2336 	case SIOCGIFMEDIA:
2337 		err = ifmedia_ioctl(ifp, (struct ifreq *)data,
2338 				    &sc->media, command);
2339                 break;
2340 
2341 	default:
2342 		err = ENOTTY;
2343         }
2344 	return err;
2345 }
2346 
2347 static void
2348 mxge_fetch_tunables(mxge_softc_t *sc)
2349 {
2350 
2351 	TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
2352 			  &mxge_flow_control);
2353 	TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
2354 			  &mxge_intr_coal_delay);
2355 	TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
2356 			  &mxge_nvidia_ecrc_enable);
2357 	TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
2358 			  &mxge_deassert_wait);
2359 	TUNABLE_INT_FETCH("hw.mxge.verbose",
2360 			  &mxge_verbose);
2361 
2362 	if (bootverbose)
2363 		mxge_verbose = 1;
2364 	if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
2365 		mxge_intr_coal_delay = 30;
2366 	sc->pause = mxge_flow_control;
2367 }
2368 
2369 static int
2370 mxge_attach(device_t dev)
2371 {
2372 	mxge_softc_t *sc = device_get_softc(dev);
2373 	struct ifnet *ifp;
2374 	size_t bytes;
2375 	int rid, err;
2376 	uint16_t cmd;
2377 
2378 	sc->dev = dev;
2379 	mxge_fetch_tunables(sc);
2380 
2381 	err = bus_dma_tag_create(NULL,			/* parent */
2382 				 1,			/* alignment */
2383 				 4096,			/* boundary */
2384 				 BUS_SPACE_MAXADDR,	/* low */
2385 				 BUS_SPACE_MAXADDR,	/* high */
2386 				 NULL, NULL,		/* filter */
2387 				 MXGE_MAX_ETHER_MTU,	/* maxsize */
2388 				 MXGE_MAX_SEND_DESC, 	/* num segs */
2389 				 4096,			/* maxsegsize */
2390 				 0,			/* flags */
2391 				 NULL, NULL,		/* lock */
2392 				 &sc->parent_dmat);	/* tag */
2393 
2394 	if (err != 0) {
2395 		device_printf(sc->dev, "Err %d allocating parent dmat\n",
2396 			      err);
2397 		goto abort_with_nothing;
2398 	}
2399 
2400 	ifp = sc->ifp = if_alloc(IFT_ETHER);
2401 	if (ifp == NULL) {
2402 		device_printf(dev, "can not if_alloc()\n");
2403 		err = ENOSPC;
2404 		goto abort_with_parent_dmat;
2405 	}
2406 	mtx_init(&sc->cmd_lock, NULL,
2407 		 MTX_NETWORK_LOCK, MTX_DEF);
2408 	mtx_init(&sc->tx_lock, device_get_nameunit(dev),
2409 		 MTX_NETWORK_LOCK, MTX_DEF);
2410 	sx_init(&sc->driver_lock, device_get_nameunit(dev));
2411 
2412 	/* Enable DMA and Memory space access */
2413 	pci_enable_busmaster(dev);
2414 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2415 	cmd |= PCIM_CMD_MEMEN;
2416 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2417 
2418 	/* Map the board into the kernel */
2419 	rid = PCIR_BARS;
2420 	sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
2421 					 ~0, 1, RF_ACTIVE);
2422 	if (sc->mem_res == NULL) {
2423 		device_printf(dev, "could not map memory\n");
2424 		err = ENXIO;
2425 		goto abort_with_lock;
2426 	}
2427 	sc->sram = rman_get_virtual(sc->mem_res);
2428 	sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
2429 	if (sc->sram_size > rman_get_size(sc->mem_res)) {
2430 		device_printf(dev, "impossible memory region size %ld\n",
2431 			      rman_get_size(sc->mem_res));
2432 		err = ENXIO;
2433 		goto abort_with_mem_res;
2434 	}
2435 
2436 	/* make NULL terminated copy of the EEPROM strings section of
2437 	   lanai SRAM */
2438 	bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
2439 	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
2440 				rman_get_bushandle(sc->mem_res),
2441 				sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
2442 				sc->eeprom_strings,
2443 				MXGE_EEPROM_STRINGS_SIZE - 2);
2444 	err = mxge_parse_strings(sc);
2445 	if (err != 0)
2446 		goto abort_with_mem_res;
2447 
2448 	/* Enable write combining for efficient use of PCIe bus */
2449 	mxge_enable_wc(sc);
2450 
2451 	/* Allocate the out of band dma memory */
2452 	err = mxge_dma_alloc(sc, &sc->cmd_dma,
2453 			     sizeof (mxge_cmd_t), 64);
2454 	if (err != 0)
2455 		goto abort_with_mem_res;
2456 	sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
2457 	err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
2458 	if (err != 0)
2459 		goto abort_with_cmd_dma;
2460 
2461 	err = mxge_dma_alloc(sc, &sc->fw_stats_dma,
2462 			     sizeof (*sc->fw_stats), 64);
2463 	if (err != 0)
2464 		goto abort_with_zeropad_dma;
2465 	sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr;
2466 
2467 
2468 	/* allocate interrupt queues */
2469 	bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);
2470 	err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096);
2471 	if (err != 0)
2472 		goto abort_with_fw_stats;
2473 	sc->rx_done.entry = sc->rx_done.dma.addr;
2474 	bzero(sc->rx_done.entry, bytes);
2475 	/* Add our ithread  */
2476 	rid = 0;
2477 	sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
2478 					 1, RF_SHAREABLE | RF_ACTIVE);
2479 	if (sc->irq_res == NULL) {
2480 		device_printf(dev, "could not alloc interrupt\n");
2481 		goto abort_with_rx_done;
2482 	}
2483 
2484 	/* load the firmware */
2485 	mxge_select_firmware(sc);
2486 
2487 	err = mxge_load_firmware(sc);
2488 	if (err != 0)
2489 		goto abort_with_irq_res;
2490 	sc->intr_coal_delay = mxge_intr_coal_delay;
2491 	err = mxge_reset(sc);
2492 	if (err != 0)
2493 		goto abort_with_irq_res;
2494 
2495 	/* hook into the network stack */
2496 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2497 	ifp->if_baudrate = 100000000;
2498 	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM;
2499 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP;
2500 	ifp->if_capenable = ifp->if_capabilities;
2501 	sc->csum_flag = 1;
2502         ifp->if_init = mxge_init;
2503         ifp->if_softc = sc;
2504         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2505         ifp->if_ioctl = mxge_ioctl;
2506         ifp->if_start = mxge_start;
2507 	ifp->if_watchdog = mxge_watchdog;
2508 	ether_ifattach(ifp, sc->mac_addr);
2509 	/* ether_ifattach sets mtu to 1500 */
2510 	ifp->if_mtu = MXGE_MAX_ETHER_MTU - ETHER_HDR_LEN;
2511 
2512 	/* Initialise the ifmedia structure */
2513 	ifmedia_init(&sc->media, 0, mxge_media_change,
2514 		     mxge_media_status);
2515 	ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL);
2516 	mxge_add_sysctls(sc);
2517 	return 0;
2518 
2519 abort_with_irq_res:
2520 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2521 abort_with_rx_done:
2522 	sc->rx_done.entry = NULL;
2523 	mxge_dma_free(&sc->rx_done.dma);
2524 abort_with_fw_stats:
2525 	mxge_dma_free(&sc->fw_stats_dma);
2526 abort_with_zeropad_dma:
2527 	mxge_dma_free(&sc->zeropad_dma);
2528 abort_with_cmd_dma:
2529 	mxge_dma_free(&sc->cmd_dma);
2530 abort_with_mem_res:
2531 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2532 abort_with_lock:
2533 	pci_disable_busmaster(dev);
2534 	mtx_destroy(&sc->cmd_lock);
2535 	mtx_destroy(&sc->tx_lock);
2536 	sx_destroy(&sc->driver_lock);
2537 	if_free(ifp);
2538 abort_with_parent_dmat:
2539 	bus_dma_tag_destroy(sc->parent_dmat);
2540 
2541 abort_with_nothing:
2542 	return err;
2543 }
2544 
2545 static int
2546 mxge_detach(device_t dev)
2547 {
2548 	mxge_softc_t *sc = device_get_softc(dev);
2549 
2550 	sx_xlock(&sc->driver_lock);
2551 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
2552 		mxge_close(sc);
2553 	sx_xunlock(&sc->driver_lock);
2554 	ether_ifdetach(sc->ifp);
2555 	mxge_dummy_rdma(sc, 0);
2556 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2557 	sc->rx_done.entry = NULL;
2558 	mxge_dma_free(&sc->rx_done.dma);
2559 	mxge_dma_free(&sc->fw_stats_dma);
2560 	mxge_dma_free(&sc->zeropad_dma);
2561 	mxge_dma_free(&sc->cmd_dma);
2562 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2563 	pci_disable_busmaster(dev);
2564 	mtx_destroy(&sc->cmd_lock);
2565 	mtx_destroy(&sc->tx_lock);
2566 	sx_destroy(&sc->driver_lock);
2567 	if_free(sc->ifp);
2568 	bus_dma_tag_destroy(sc->parent_dmat);
2569 	return 0;
2570 }
2571 
2572 static int
2573 mxge_shutdown(device_t dev)
2574 {
2575 	return 0;
2576 }
2577 
2578 /*
2579   This file uses Myri10GE driver indentation.
2580 
2581   Local Variables:
2582   c-file-style:"linux"
2583   tab-width:8
2584   End:
2585 */
2586