xref: /freebsd/sys/dev/mxge/if_mxge.c (revision 87569f75a91f298c52a71823c04d41cf53c88889)
1 /*******************************************************************************
2 
3 Copyright (c) 2006, Myricom Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Myricom Inc, nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/linker.h>
40 #include <sys/firmware.h>
41 #include <sys/endian.h>
42 #include <sys/sockio.h>
43 #include <sys/mbuf.h>
44 #include <sys/malloc.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/module.h>
48 #include <sys/memrange.h>
49 #include <sys/socket.h>
50 #include <sys/sysctl.h>
51 #include <sys/sx.h>
52 
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/ethernet.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 
59 #include <net/bpf.h>
60 
61 #include <net/if_types.h>
62 #include <net/if_vlan_var.h>
63 #include <net/zlib.h>
64 
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 
69 #include <machine/clock.h>      /* for DELAY */
70 #include <machine/bus.h>
71 #include <machine/resource.h>
72 #include <sys/bus.h>
73 #include <sys/rman.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 
78 #include <vm/vm.h>		/* for pmap_mapdev() */
79 #include <vm/pmap.h>
80 
81 #include <dev/myri10ge/myri10ge_mcp.h>
82 #include <dev/myri10ge/mcp_gen_header.h>
83 #include <dev/myri10ge/if_myri10ge_var.h>
84 
85 /* tunable params */
86 static int myri10ge_nvidia_ecrc_enable = 1;
87 static int myri10ge_max_intr_slots = 128;
88 static int myri10ge_intr_coal_delay = 30;
89 static int myri10ge_skip_pio_read = 0;
90 static int myri10ge_flow_control = 1;
91 static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e";
92 static char *myri10ge_fw_aligned = "myri10ge_eth_z8e";
93 
94 static int myri10ge_probe(device_t dev);
95 static int myri10ge_attach(device_t dev);
96 static int myri10ge_detach(device_t dev);
97 static int myri10ge_shutdown(device_t dev);
98 static void myri10ge_intr(void *arg);
99 
100 static device_method_t myri10ge_methods[] =
101 {
102   /* Device interface */
103   DEVMETHOD(device_probe, myri10ge_probe),
104   DEVMETHOD(device_attach, myri10ge_attach),
105   DEVMETHOD(device_detach, myri10ge_detach),
106   DEVMETHOD(device_shutdown, myri10ge_shutdown),
107   {0, 0}
108 };
109 
110 static driver_t myri10ge_driver =
111 {
112   "myri10ge",
113   myri10ge_methods,
114   sizeof(myri10ge_softc_t),
115 };
116 
117 static devclass_t myri10ge_devclass;
118 
119 /* Declare ourselves to be a child of the PCI bus.*/
120 DRIVER_MODULE(myri10ge, pci, myri10ge_driver, myri10ge_devclass, 0, 0);
121 MODULE_DEPEND(myri10ge, firmware, 1, 1, 1);
122 
123 static int
124 myri10ge_probe(device_t dev)
125 {
126   if ((pci_get_vendor(dev) == MYRI10GE_PCI_VENDOR_MYRICOM) &&
127       (pci_get_device(dev) == MYRI10GE_PCI_DEVICE_Z8E)) {
128 	  device_set_desc(dev, "Myri10G-PCIE-8A");
129 	  return 0;
130   }
131   return ENXIO;
132 }
133 
134 static void
135 myri10ge_enable_wc(myri10ge_softc_t *sc)
136 {
137 	struct mem_range_desc mrdesc;
138 	vm_paddr_t pa;
139 	vm_offset_t len;
140 	int err, action;
141 
142 	pa = rman_get_start(sc->mem_res);
143 	len = rman_get_size(sc->mem_res);
144 	mrdesc.mr_base = pa;
145 	mrdesc.mr_len = len;
146 	mrdesc.mr_flags = MDF_WRITECOMBINE;
147 	action = MEMRANGE_SET_UPDATE;
148 	strcpy((char *)&mrdesc.mr_owner, "myri10ge");
149 	err = mem_range_attr_set(&mrdesc, &action);
150 	if (err != 0) {
151 		device_printf(sc->dev,
152 			      "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n",
153 			      (unsigned long)pa, (unsigned long)len, err);
154 	} else {
155 		sc->wc = 1;
156 	}
157 }
158 
159 
160 /* callback to get our DMA address */
161 static void
162 myri10ge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
163 			 int error)
164 {
165 	if (error == 0) {
166 		*(bus_addr_t *) arg = segs->ds_addr;
167 	}
168 }
169 
170 static int
171 myri10ge_dma_alloc(myri10ge_softc_t *sc, myri10ge_dma_t *dma, size_t bytes,
172 		   bus_size_t alignment)
173 {
174 	int err;
175 	device_t dev = sc->dev;
176 
177 	/* allocate DMAable memory tags */
178 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
179 				 alignment,		/* alignment */
180 				 4096,			/* boundary */
181 				 BUS_SPACE_MAXADDR,	/* low */
182 				 BUS_SPACE_MAXADDR,	/* high */
183 				 NULL, NULL,		/* filter */
184 				 bytes,			/* maxsize */
185 				 1,			/* num segs */
186 				 4096,			/* maxsegsize */
187 				 BUS_DMA_COHERENT,	/* flags */
188 				 NULL, NULL,		/* lock */
189 				 &dma->dmat);		/* tag */
190 	if (err != 0) {
191 		device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
192 		return err;
193 	}
194 
195 	/* allocate DMAable memory & map */
196 	err = bus_dmamem_alloc(dma->dmat, &dma->addr,
197 			       (BUS_DMA_WAITOK | BUS_DMA_COHERENT
198 				| BUS_DMA_ZERO),  &dma->map);
199 	if (err != 0) {
200 		device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
201 		goto abort_with_dmat;
202 	}
203 
204 	/* load the memory */
205 	err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
206 			      myri10ge_dmamap_callback,
207 			      (void *)&dma->bus_addr, 0);
208 	if (err != 0) {
209 		device_printf(dev, "couldn't load map (err = %d)\n", err);
210 		goto abort_with_mem;
211 	}
212 	return 0;
213 
214 abort_with_mem:
215 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
216 abort_with_dmat:
217 	(void)bus_dma_tag_destroy(dma->dmat);
218 	return err;
219 }
220 
221 
222 static void
223 myri10ge_dma_free(myri10ge_dma_t *dma)
224 {
225 	bus_dmamap_unload(dma->dmat, dma->map);
226 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
227 	(void)bus_dma_tag_destroy(dma->dmat);
228 }
229 
230 /*
231  * The eeprom strings on the lanaiX have the format
232  * SN=x\0
233  * MAC=x:x:x:x:x:x\0
234  * PC=text\0
235  */
236 
237 static int
238 myri10ge_parse_strings(myri10ge_softc_t *sc)
239 {
240 #define MYRI10GE_NEXT_STRING(p) while(ptr < limit && *ptr++)
241 
242 	char *ptr, *limit;
243 	int i, found_mac;
244 
245 	ptr = sc->eeprom_strings;
246 	limit = sc->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
247 	found_mac = 0;
248 	while (ptr < limit && *ptr != '\0') {
249 		if (memcmp(ptr, "MAC=", 4) == 0) {
250 			ptr+=4;
251 			sc->mac_addr_string = ptr;
252 			for (i = 0; i < 6; i++) {
253 				if ((ptr + 2) > limit)
254 					goto abort;
255 				sc->mac_addr[i] = strtoul(ptr, NULL, 16);
256 				found_mac = 1;
257 				ptr += 3;
258 			}
259 		} else if (memcmp(ptr, "PC=", 4) == 0) {
260 			sc->product_code_string = ptr;
261 		}
262 		MYRI10GE_NEXT_STRING(ptr);
263 	}
264 
265 	if (found_mac)
266 		return 0;
267 
268  abort:
269 	device_printf(sc->dev, "failed to parse eeprom_strings\n");
270 
271 	return ENXIO;
272 }
273 
274 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
275 static int
276 myri10ge_enable_nvidia_ecrc(myri10ge_softc_t *sc, device_t pdev)
277 {
278 	uint32_t val;
279 	unsigned long off;
280 	char *va, *cfgptr;
281 	uint16_t vendor_id, device_id;
282 	uintptr_t bus, slot, func, ivend, idev;
283 	uint32_t *ptr32;
284 
285 	/* XXXX
286 	   Test below is commented because it is believed that doing
287 	   config read/write beyond 0xff will access the config space
288 	   for the next larger function.  Uncomment this and remove
289 	   the hacky pmap_mapdev() way of accessing config space when
290 	   FreeBSD grows support for extended pcie config space access
291 	*/
292 #if 0
293 	/* See if we can, by some miracle, access the extended
294 	   config space */
295 	val = pci_read_config(pdev, 0x178, 4);
296 	if (val != 0xffffffff) {
297 		val |= 0x40;
298 		pci_write_config(pdev, 0x178, val, 4);
299 		return 0;
300 	}
301 #endif
302 	/* Rather than using normal pci config space writes, we must
303 	 * map the Nvidia config space ourselves.  This is because on
304 	 * opteron/nvidia class machine the 0xe000000 mapping is
305 	 * handled by the nvidia chipset, that means the internal PCI
306 	 * device (the on-chip northbridge), or the amd-8131 bridge
307 	 * and things behind them are not visible by this method.
308 	 */
309 
310 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
311 		      PCI_IVAR_BUS, &bus);
312 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
313 		      PCI_IVAR_SLOT, &slot);
314 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
315 		      PCI_IVAR_FUNCTION, &func);
316 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
317 		      PCI_IVAR_VENDOR, &ivend);
318 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
319 		      PCI_IVAR_DEVICE, &idev);
320 
321 	off =  0xe0000000UL
322 		+ 0x00100000UL * (unsigned long)bus
323 		+ 0x00001000UL * (unsigned long)(func
324 						 + 8 * slot);
325 
326 	/* map it into the kernel */
327 	va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
328 
329 
330 	if (va == NULL) {
331 		device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
332 		return EIO;
333 	}
334 	/* get a pointer to the config space mapped into the kernel */
335 	cfgptr = va + (off & PAGE_MASK);
336 
337 	/* make sure that we can really access it */
338 	vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
339 	device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
340 	if (! (vendor_id == ivend && device_id == idev)) {
341 		device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
342 			      vendor_id, device_id);
343 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
344 		return EIO;
345 	}
346 
347 	ptr32 = (uint32_t*)(cfgptr + 0x178);
348 	val = *ptr32;
349 
350 	if (val == 0xffffffff) {
351 		device_printf(sc->dev, "extended mapping failed\n");
352 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
353 		return EIO;
354 	}
355 	*ptr32 = val | 0x40;
356 	pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
357 	device_printf(sc->dev,
358 		      "Enabled ECRC on upstream Nvidia bridge at %d:%d:%d\n",
359 		      (int)bus, (int)slot, (int)func);
360 	return 0;
361 }
362 #else
363 static int
364 myri10ge_enable_nvidia_ecrc(myri10ge_softc_t *sc, device_t pdev)
365 {
366 	device_printf(sc->dev,
367 		      "Nforce 4 chipset on non-x86/amd64!?!?!\n");
368 	return ENXIO;
369 }
370 #endif
371 /*
372  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
373  * when the PCI-E Completion packets are aligned on an 8-byte
374  * boundary.  Some PCI-E chip sets always align Completion packets; on
375  * the ones that do not, the alignment can be enforced by enabling
376  * ECRC generation (if supported).
377  *
378  * When PCI-E Completion packets are not aligned, it is actually more
379  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
380  *
381  * If the driver can neither enable ECRC nor verify that it has
382  * already been enabled, then it must use a firmware image which works
383  * around unaligned completion packets (ethp_z8e.dat), and it should
384  * also ensure that it never gives the device a Read-DMA which is
385  * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
386  * enabled, then the driver should use the aligned (eth_z8e.dat)
387  * firmware image, and set tx.boundary to 4KB.
388  */
389 
390 static void
391 myri10ge_select_firmware(myri10ge_softc_t *sc)
392 {
393 	int err, aligned = 0;
394 	device_t pdev;
395 	uint16_t pvend, pdid;
396 
397 	pdev = device_get_parent(device_get_parent(sc->dev));
398 	if (pdev == NULL) {
399 		device_printf(sc->dev, "could not find parent?\n");
400 		goto abort;
401 	}
402 	pvend = pci_read_config(pdev, PCIR_VENDOR, 2);
403 	pdid = pci_read_config(pdev, PCIR_DEVICE, 2);
404 
405 	/* see if we can enable ECRC's on an upstream
406 	   Nvidia bridge */
407 	if (myri10ge_nvidia_ecrc_enable &&
408 	    (pvend == 0x10de && pdid == 0x005d)) {
409 		err = myri10ge_enable_nvidia_ecrc(sc, pdev);
410 		if (err == 0) {
411 			aligned = 1;
412 			device_printf(sc->dev,
413 				      "Assuming aligned completions (ECRC)\n");
414 		}
415 	}
416 	/* see if the upstream bridge is known to
417 	   provided aligned completions */
418 	if (/* HT2000  */ (pvend == 0x1166 && pdid == 0x0132) ||
419 	    /* Ontario */ (pvend == 0x10b5 && pdid == 0x8532)) {
420 		device_printf(sc->dev,
421 			      "Assuming aligned completions (0x%x:0x%x)\n",
422 			      pvend, pdid);
423 	}
424 
425 abort:
426 	if (aligned) {
427 		sc->fw_name = myri10ge_fw_aligned;
428 		sc->tx.boundary = 4096;
429 	} else {
430 		sc->fw_name = myri10ge_fw_unaligned;
431 		sc->tx.boundary = 2048;
432 	}
433 }
434 
435 union qualhack
436 {
437         const char *ro_char;
438         char *rw_char;
439 };
440 
441 
442 static int
443 myri10ge_load_firmware_helper(myri10ge_softc_t *sc, uint32_t *limit)
444 {
445 	struct firmware *fw;
446 	const mcp_gen_header_t *hdr;
447 	unsigned hdr_offset;
448 	const char *fw_data;
449 	union qualhack hack;
450 	int status;
451 
452 
453 	fw = firmware_get(sc->fw_name);
454 
455 	if (fw == NULL) {
456 		device_printf(sc->dev, "Could not find firmware image %s\n",
457 			      sc->fw_name);
458 		return ENOENT;
459 	}
460 	if (fw->datasize > *limit ||
461 	    fw->datasize < MCP_HEADER_PTR_OFFSET + 4) {
462 		device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n",
463 			      sc->fw_name, (int)fw->datasize, (int) *limit);
464 		status = ENOSPC;
465 		goto abort_with_fw;
466 	}
467 	*limit = fw->datasize;
468 
469 	/* check id */
470 	fw_data = (const char *)fw->data;
471 	hdr_offset = htobe32(*(const uint32_t *)
472 			     (fw_data + MCP_HEADER_PTR_OFFSET));
473 	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) {
474 		device_printf(sc->dev, "Bad firmware file");
475 		status = EIO;
476 		goto abort_with_fw;
477 	}
478 	hdr = (const void*)(fw_data + hdr_offset);
479 	if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
480 		device_printf(sc->dev, "Bad firmware type: 0x%x\n",
481 			      be32toh(hdr->mcp_type));
482 		status = EIO;
483 		goto abort_with_fw;
484 	}
485 
486 	/* save firmware version for sysctl */
487 	strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
488 	device_printf(sc->dev, "firmware id: %s\n", hdr->version);
489 
490 	hack.ro_char = fw_data;
491 	/* Copy the inflated firmware to NIC SRAM. */
492 	myri10ge_pio_copy(&sc->sram[MYRI10GE_FW_OFFSET],
493 			  hack.rw_char,  *limit);
494 
495 	status = 0;
496 abort_with_fw:
497 	firmware_put(fw, FIRMWARE_UNLOAD);
498 	return status;
499 }
500 
501 /*
502  * Enable or disable periodic RDMAs from the host to make certain
503  * chipsets resend dropped PCIe messages
504  */
505 
506 static void
507 myri10ge_dummy_rdma(myri10ge_softc_t *sc, int enable)
508 {
509 	char buf_bytes[72];
510 	volatile uint32_t *confirm;
511 	volatile char *submit;
512 	uint32_t *buf, dma_low, dma_high;
513 	int i;
514 
515 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
516 
517 	/* clear confirmation addr */
518 	confirm = (volatile uint32_t *)sc->cmd;
519 	*confirm = 0;
520 	mb();
521 
522 	/* send an rdma command to the PCIe engine, and wait for the
523 	   response in the confirmation address.  The firmware should
524 	   write a -1 there to indicate it is alive and well
525 	*/
526 
527 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
528 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
529 	buf[0] = htobe32(dma_high);		/* confirm addr MSW */
530 	buf[1] = htobe32(dma_low);		/* confirm addr LSW */
531 	buf[2] = htobe32(0xffffffff);		/* confirm data */
532 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
533 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
534 	buf[3] = htobe32(dma_high); 		/* dummy addr MSW */
535 	buf[4] = htobe32(dma_low); 		/* dummy addr LSW */
536 	buf[5] = htobe32(enable);			/* enable? */
537 
538 
539 	submit = (volatile char *)(sc->sram + 0xfc01c0);
540 
541 	myri10ge_pio_copy(submit, buf, 64);
542 	mb();
543 	DELAY(1000);
544 	mb();
545 	i = 0;
546 	while (*confirm != 0xffffffff && i < 20) {
547 		DELAY(1000);
548 		i++;
549 	}
550 	if (*confirm != 0xffffffff) {
551 		device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
552 			      (enable ? "enable" : "disable"), confirm,
553 			      *confirm);
554 	}
555 	return;
556 }
557 
558 static int
559 myri10ge_send_cmd(myri10ge_softc_t *sc, uint32_t cmd,
560 		  myri10ge_cmd_t *data)
561 {
562 	mcp_cmd_t *buf;
563 	char buf_bytes[sizeof(*buf) + 8];
564 	volatile mcp_cmd_response_t *response = sc->cmd;
565 	volatile char *cmd_addr = sc->sram + MYRI10GE_MCP_CMD_OFFSET;
566 	uint32_t dma_low, dma_high;
567 	int sleep_total = 0;
568 
569 	/* ensure buf is aligned to 8 bytes */
570 	buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
571 
572 	buf->data0 = htobe32(data->data0);
573 	buf->data1 = htobe32(data->data1);
574 	buf->data2 = htobe32(data->data2);
575 	buf->cmd = htobe32(cmd);
576 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
577 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
578 
579 	buf->response_addr.low = htobe32(dma_low);
580 	buf->response_addr.high = htobe32(dma_high);
581 	mtx_lock(&sc->cmd_lock);
582 	response->result = 0xffffffff;
583 	mb();
584 	myri10ge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
585 
586 	/* wait up to 2 seconds */
587 	for (sleep_total = 0; sleep_total <  (2 * 1000); sleep_total += 10) {
588 		bus_dmamap_sync(sc->cmd_dma.dmat,
589 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
590 		mb();
591 		if (response->result != 0xffffffff) {
592 			if (response->result == 0) {
593 				data->data0 = be32toh(response->data);
594 				mtx_unlock(&sc->cmd_lock);
595 				return 0;
596 			} else {
597 				device_printf(sc->dev,
598 					      "myri10ge: command %d "
599 					      "failed, result = %d\n",
600 					      cmd, be32toh(response->result));
601 				mtx_unlock(&sc->cmd_lock);
602 				return ENXIO;
603 			}
604 		}
605 		DELAY(1000 * 10);
606 	}
607 	mtx_unlock(&sc->cmd_lock);
608 	device_printf(sc->dev, "myri10ge: command %d timed out"
609 		      "result = %d\n",
610 		      cmd, be32toh(response->result));
611 	return EAGAIN;
612 }
613 
614 
615 static int
616 myri10ge_load_firmware(myri10ge_softc_t *sc)
617 {
618 	volatile uint32_t *confirm;
619 	volatile char *submit;
620 	char buf_bytes[72];
621 	uint32_t *buf, size, dma_low, dma_high;
622 	int status, i;
623 
624 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
625 
626 	size = sc->sram_size;
627 	status = myri10ge_load_firmware_helper(sc, &size);
628 	if (status) {
629 		device_printf(sc->dev, "firmware loading failed\n");
630 		return status;
631 	}
632 	/* clear confirmation addr */
633 	confirm = (volatile uint32_t *)sc->cmd;
634 	*confirm = 0;
635 	mb();
636 	/* send a reload command to the bootstrap MCP, and wait for the
637 	   response in the confirmation address.  The firmware should
638 	   write a -1 there to indicate it is alive and well
639 	*/
640 
641 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
642 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
643 
644 	buf[0] = htobe32(dma_high);	/* confirm addr MSW */
645 	buf[1] = htobe32(dma_low);	/* confirm addr LSW */
646 	buf[2] = htobe32(0xffffffff);	/* confirm data */
647 
648 	/* FIX: All newest firmware should un-protect the bottom of
649 	   the sram before handoff. However, the very first interfaces
650 	   do not. Therefore the handoff copy must skip the first 8 bytes
651 	*/
652 					/* where the code starts*/
653 	buf[3] = htobe32(MYRI10GE_FW_OFFSET + 8);
654 	buf[4] = htobe32(size - 8); 	/* length of code */
655 	buf[5] = htobe32(8);		/* where to copy to */
656 	buf[6] = htobe32(0);		/* where to jump to */
657 
658 	submit = (volatile char *)(sc->sram + 0xfc0000);
659 	myri10ge_pio_copy(submit, buf, 64);
660 	mb();
661 	DELAY(1000);
662 	mb();
663 	i = 0;
664 	while (*confirm != 0xffffffff && i < 20) {
665 		DELAY(1000*10);
666 		i++;
667 		bus_dmamap_sync(sc->cmd_dma.dmat,
668 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
669 	}
670 	if (*confirm != 0xffffffff) {
671 		device_printf(sc->dev,"handoff failed (%p = 0x%x)",
672 			confirm, *confirm);
673 
674 		return ENXIO;
675 	}
676 	myri10ge_dummy_rdma(sc, 1);
677 	return 0;
678 }
679 
680 static int
681 myri10ge_update_mac_address(myri10ge_softc_t *sc)
682 {
683 	myri10ge_cmd_t cmd;
684 	uint8_t *addr = sc->mac_addr;
685 	int status;
686 
687 
688 	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
689 		     | (addr[2] << 8) | addr[3]);
690 
691 	cmd.data1 = ((addr[4] << 8) | (addr[5]));
692 
693 	status = myri10ge_send_cmd(sc, MYRI10GE_MCP_SET_MAC_ADDRESS, &cmd);
694 	return status;
695 }
696 
697 static int
698 myri10ge_change_pause(myri10ge_softc_t *sc, int pause)
699 {
700 	myri10ge_cmd_t cmd;
701 	int status;
702 
703 	if (pause)
704 		status = myri10ge_send_cmd(sc,
705 					   MYRI10GE_MCP_ENABLE_FLOW_CONTROL,
706 					   &cmd);
707 	else
708 		status = myri10ge_send_cmd(sc,
709 					   MYRI10GE_MCP_DISABLE_FLOW_CONTROL,
710 					   &cmd);
711 
712 	if (status) {
713 		device_printf(sc->dev, "Failed to set flow control mode\n");
714 		return ENXIO;
715 	}
716 	sc->pause = pause;
717 	return 0;
718 }
719 
720 static void
721 myri10ge_change_promisc(myri10ge_softc_t *sc, int promisc)
722 {
723 	myri10ge_cmd_t cmd;
724 	int status;
725 
726 	if (promisc)
727 		status = myri10ge_send_cmd(sc,
728 					   MYRI10GE_MCP_ENABLE_PROMISC,
729 					   &cmd);
730 	else
731 		status = myri10ge_send_cmd(sc,
732 					   MYRI10GE_MCP_DISABLE_PROMISC,
733 					   &cmd);
734 
735 	if (status) {
736 		device_printf(sc->dev, "Failed to set promisc mode\n");
737 	}
738 }
739 
740 static int
741 myri10ge_reset(myri10ge_softc_t *sc)
742 {
743 
744 	myri10ge_cmd_t cmd;
745 	int status, i;
746 
747 	/* try to send a reset command to the card to see if it
748 	   is alive */
749 	memset(&cmd, 0, sizeof (cmd));
750 	status = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_RESET, &cmd);
751 	if (status != 0) {
752 		device_printf(sc->dev, "failed reset\n");
753 		return ENXIO;
754 	}
755 
756 	/* Now exchange information about interrupts  */
757 
758 	cmd.data0 = (uint32_t)
759 		(myri10ge_max_intr_slots * sizeof (*sc->intr.q[0]));
760 	status = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_INTRQ_SIZE, &cmd);
761 	for (i = 0; (status == 0) && (i < MYRI10GE_NUM_INTRQS); i++) {
762 		cmd.data0 = MYRI10GE_LOWPART_TO_U32(sc->intr.dma[i].bus_addr);
763 		cmd.data1 = MYRI10GE_HIGHPART_TO_U32(sc->intr.dma[i].bus_addr);
764 		status |=
765 			myri10ge_send_cmd(sc, (i +
766 					       MYRI10GE_MCP_CMD_SET_INTRQ0_DMA),
767 					  &cmd);
768 	}
769 
770 	cmd.data0 = sc->intr_coal_delay = myri10ge_intr_coal_delay;
771 	status |= myri10ge_send_cmd(sc,
772 				    MYRI10GE_MCP_CMD_SET_INTR_COAL_DELAY, &cmd);
773 
774 	if (sc->msi_enabled) {
775 		status |= myri10ge_send_cmd
776 			(sc,  MYRI10GE_MCP_CMD_GET_IRQ_ACK_OFFSET, &cmd);
777 	} else {
778 		status |= myri10ge_send_cmd
779 			(sc,  MYRI10GE_MCP_CMD_GET_IRQ_ACK_DEASSERT_OFFSET,
780 			 &cmd);
781 	}
782 	if (status != 0) {
783 		device_printf(sc->dev, "failed set interrupt parameters\n");
784 		return status;
785 	}
786 	sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
787 
788 	/* reset mcp/driver shared state back to 0 */
789 	sc->intr.seqnum = 0;
790 	sc->intr.intrq = 0;
791 	sc->intr.slot = 0;
792 	sc->tx.req = 0;
793 	sc->tx.done = 0;
794 	sc->rx_big.cnt = 0;
795 	sc->rx_small.cnt = 0;
796 	sc->rdma_tags_available = 15;
797 	status = myri10ge_update_mac_address(sc);
798 	myri10ge_change_promisc(sc, 0);
799 	myri10ge_change_pause(sc, sc->pause);
800 	return status;
801 }
802 
803 static int
804 myri10ge_change_intr_coal(SYSCTL_HANDLER_ARGS)
805 {
806         myri10ge_cmd_t cmd;
807         myri10ge_softc_t *sc;
808         unsigned int intr_coal_delay;
809         int err;
810 
811         sc = arg1;
812         intr_coal_delay = sc->intr_coal_delay;
813         err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
814         if (err != 0) {
815                 return err;
816         }
817         if (intr_coal_delay == sc->intr_coal_delay)
818                 return 0;
819 
820         if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
821                 return EINVAL;
822 
823 	sx_xlock(&sc->driver_lock);
824         cmd.data0 = intr_coal_delay;
825         err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_INTR_COAL_DELAY,
826 				  &cmd);
827         if (err == 0) {
828 		sc->intr_coal_delay = intr_coal_delay;
829 	}
830 	sx_xunlock(&sc->driver_lock);
831         return err;
832 }
833 
834 static int
835 myri10ge_change_flow_control(SYSCTL_HANDLER_ARGS)
836 {
837         myri10ge_softc_t *sc;
838         unsigned int enabled;
839         int err;
840 
841         sc = arg1;
842         enabled = sc->pause;
843         err = sysctl_handle_int(oidp, &enabled, arg2, req);
844         if (err != 0) {
845                 return err;
846         }
847         if (enabled == sc->pause)
848                 return 0;
849 
850 	sx_xlock(&sc->driver_lock);
851 	err = myri10ge_change_pause(sc, enabled);
852 	sx_xunlock(&sc->driver_lock);
853         return err;
854 }
855 
856 static int
857 myri10ge_handle_be32(SYSCTL_HANDLER_ARGS)
858 {
859         int err;
860 
861         if (arg1 == NULL)
862                 return EFAULT;
863         arg2 = be32toh(*(int *)arg1);
864         arg1 = NULL;
865         err = sysctl_handle_int(oidp, arg1, arg2, req);
866 
867         return err;
868 }
869 
870 static void
871 myri10ge_add_sysctls(myri10ge_softc_t *sc)
872 {
873 	struct sysctl_ctx_list *ctx;
874 	struct sysctl_oid_list *children;
875 	mcp_stats_t *fw;
876 
877 	ctx = device_get_sysctl_ctx(sc->dev);
878 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
879 	fw = sc->fw_stats;
880 
881 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
882 			"intr_coal_delay",
883 			CTLTYPE_INT|CTLFLAG_RW, sc,
884 			0, myri10ge_change_intr_coal,
885 			"I", "interrupt coalescing delay in usecs");
886 
887 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
888 			"flow_control_enabled",
889 			CTLTYPE_INT|CTLFLAG_RW, sc,
890 			0, myri10ge_change_flow_control,
891 			"I", "interrupt coalescing delay in usecs");
892 
893 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
894 		       "skip_pio_read",
895 		       CTLFLAG_RW, &myri10ge_skip_pio_read,
896 		       0, "Skip pio read in interrupt handler");
897 
898 	/* stats block from firmware is in network byte order.
899 	   Need to swap it */
900 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
901 			"link_up",
902 			CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
903 			0, myri10ge_handle_be32,
904 			"I", "link up");
905 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
906 			"rdma_tags_available",
907 			CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
908 			0, myri10ge_handle_be32,
909 			"I", "rdma_tags_available");
910 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
911 			"dropped_link_overflow",
912 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
913 			0, myri10ge_handle_be32,
914 			"I", "dropped_link_overflow");
915 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
916 			"dropped_link_error_or_filtered",
917 			CTLTYPE_INT|CTLFLAG_RD,
918 			&fw->dropped_link_error_or_filtered,
919 			0, myri10ge_handle_be32,
920 			"I", "dropped_link_error_or_filtered");
921 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
922 			"dropped_runt",
923 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
924 			0, myri10ge_handle_be32,
925 			"I", "dropped_runt");
926 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
927 			"dropped_overrun",
928 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
929 			0, myri10ge_handle_be32,
930 			"I", "dropped_overrun");
931 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
932 			"dropped_no_small_buffer",
933 			CTLTYPE_INT|CTLFLAG_RD,
934 			&fw->dropped_no_small_buffer,
935 			0, myri10ge_handle_be32,
936 			"I", "dropped_no_small_buffer");
937 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
938 			"dropped_no_big_buffer",
939 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
940 			0, myri10ge_handle_be32,
941 			"I", "dropped_no_big_buffer");
942 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
943 			"dropped_interrupt_busy",
944 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_interrupt_busy,
945 			0, myri10ge_handle_be32,
946 			"I", "dropped_interrupt_busy");
947 
948 	/* host counters exported for debugging */
949 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
950 		       "tx_req",
951 		       CTLFLAG_RD, &sc->tx.req,
952 		       0, "tx_req");
953 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
954 		       "tx_done",
955 		       CTLFLAG_RD, &sc->tx.done,
956 		       0, "tx_done");
957 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
958 		       "rx_small_cnt",
959 		       CTLFLAG_RD, &sc->rx_small.cnt,
960 		       0, "rx_small_cnt");
961 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
962 		       "rx_big_cnt",
963 		       CTLFLAG_RD, &sc->rx_big.cnt,
964 		       0, "rx_small_cnt");
965 
966 }
967 
968 /* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
969    backwards one at a time and handle ring wraps */
970 
971 static inline void
972 myri10ge_submit_req_backwards(myri10ge_tx_buf_t *tx,
973 			    mcp_kreq_ether_send_t *src, int cnt)
974 {
975         int idx, starting_slot;
976         starting_slot = tx->req;
977         while (cnt > 1) {
978                 cnt--;
979                 idx = (starting_slot + cnt) & tx->mask;
980                 myri10ge_pio_copy(&tx->lanai[idx],
981 				&src[cnt], sizeof(*src));
982                 mb();
983         }
984 }
985 
986 /*
987  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
988  * at most 32 bytes at a time, so as to avoid involving the software
989  * pio handler in the nic.   We re-write the first segment's flags
990  * to mark them valid only after writing the entire chain
991  */
992 
993 static inline void
994 myri10ge_submit_req(myri10ge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
995                   int cnt)
996 {
997         int idx, i;
998         uint32_t *src_ints;
999 	volatile uint32_t *dst_ints;
1000         mcp_kreq_ether_send_t *srcp;
1001 	volatile mcp_kreq_ether_send_t *dstp, *dst;
1002 
1003 
1004         idx = tx->req & tx->mask;
1005 
1006         src->flags &= ~(htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID));
1007         mb();
1008         dst = dstp = &tx->lanai[idx];
1009         srcp = src;
1010 
1011         if ((idx + cnt) < tx->mask) {
1012                 for (i = 0; i < (cnt - 1); i += 2) {
1013                         myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1014                         mb(); /* force write every 32 bytes */
1015                         srcp += 2;
1016                         dstp += 2;
1017                 }
1018         } else {
1019                 /* submit all but the first request, and ensure
1020                    that it is submitted below */
1021                 myri10ge_submit_req_backwards(tx, src, cnt);
1022                 i = 0;
1023         }
1024         if (i < cnt) {
1025                 /* submit the first request */
1026                 myri10ge_pio_copy(dstp, srcp, sizeof(*src));
1027                 mb(); /* barrier before setting valid flag */
1028         }
1029 
1030         /* re-write the last 32-bits with the valid flags */
1031         src->flags |= htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID);
1032         src_ints = (uint32_t *)src;
1033         src_ints+=3;
1034         dst_ints = (volatile uint32_t *)dst;
1035         dst_ints+=3;
1036         *dst_ints =  *src_ints;
1037         tx->req += cnt;
1038         mb();
1039 }
1040 
1041 static inline void
1042 myri10ge_submit_req_wc(myri10ge_tx_buf_t *tx,
1043 		     mcp_kreq_ether_send_t *src, int cnt)
1044 {
1045     tx->req += cnt;
1046     mb();
1047     while (cnt >= 4) {
1048 	    myri10ge_pio_copy((volatile char *)tx->wc_fifo, src, 64);
1049 	    mb();
1050 	    src += 4;
1051 	    cnt -= 4;
1052     }
1053     if (cnt > 0) {
1054 	    /* pad it to 64 bytes.  The src is 64 bytes bigger than it
1055 	       needs to be so that we don't overrun it */
1056 	    myri10ge_pio_copy(tx->wc_fifo + (cnt<<18), src, 64);
1057 	    mb();
1058     }
1059 }
1060 
1061 static void
1062 myri10ge_encap(myri10ge_softc_t *sc, struct mbuf *m)
1063 {
1064 	mcp_kreq_ether_send_t *req;
1065 	bus_dma_segment_t seg_list[MYRI10GE_MCP_ETHER_MAX_SEND_DESC];
1066 	bus_dma_segment_t *seg;
1067 	struct mbuf *m_tmp;
1068 	struct ifnet *ifp;
1069 	myri10ge_tx_buf_t *tx;
1070 	struct ether_header *eh;
1071 	struct ip *ip;
1072 	int cnt, cum_len, err, i, idx;
1073 	uint16_t flags, pseudo_hdr_offset;
1074         uint8_t cksum_offset;
1075 
1076 
1077 
1078 	ifp = sc->ifp;
1079 	tx = &sc->tx;
1080 
1081 	/* (try to) map the frame for DMA */
1082 	idx = tx->req & tx->mask;
1083 	err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1084 				      m, seg_list, &cnt,
1085 				      BUS_DMA_NOWAIT);
1086 	if (err == EFBIG) {
1087 		/* Too many segments in the chain.  Try
1088 		   to defrag */
1089 		m_tmp = m_defrag(m, M_NOWAIT);
1090 		if (m_tmp == NULL) {
1091 			goto drop;
1092 		}
1093 		m = m_tmp;
1094 		err = bus_dmamap_load_mbuf_sg(tx->dmat,
1095 					      tx->info[idx].map,
1096 					      m, seg_list, &cnt,
1097 					      BUS_DMA_NOWAIT);
1098 	}
1099 	if (err != 0) {
1100 		device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d\n",
1101 			      err);
1102 		goto drop;
1103 	}
1104 	bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1105 			BUS_DMASYNC_PREWRITE);
1106 
1107 	req = tx->req_list;
1108 	cksum_offset = 0;
1109 	flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID |
1110 			MYRI10GE_MCP_ETHER_FLAGS_NOT_LAST);
1111 
1112 	/* checksum offloading? */
1113 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1114 		eh = mtod(m, struct ether_header *);
1115 		ip = (struct ip *) (eh + 1);
1116 		cksum_offset = sizeof(*eh) + (ip->ip_hl << 2);
1117 		pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
1118 		req->pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1119 		req->cksum_offset = cksum_offset;
1120 		flags |= htobe16(MYRI10GE_MCP_ETHER_FLAGS_CKSUM);
1121 	}
1122 	if (m->m_pkthdr.len < 512)
1123 		req->flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_FIRST |
1124 				     MYRI10GE_MCP_ETHER_FLAGS_SMALL);
1125 	else
1126 		req->flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_FIRST);
1127 
1128 	/* convert segments into a request list */
1129 	cum_len = 0;
1130 	seg = seg_list;
1131 	for (i = 0; i < cnt; i++) {
1132 		req->addr_low =
1133 			htobe32(MYRI10GE_LOWPART_TO_U32(seg->ds_addr));
1134 		req->addr_high =
1135 			htobe32(MYRI10GE_HIGHPART_TO_U32(seg->ds_addr));
1136 		req->length = htobe16(seg->ds_len);
1137 		req->cksum_offset = cksum_offset;
1138 		if (cksum_offset > seg->ds_len)
1139 			cksum_offset -= seg->ds_len;
1140 		else
1141 			cksum_offset = 0;
1142 		req->flags |= flags | ((cum_len & 1) *
1143 				       htobe16(MYRI10GE_MCP_ETHER_FLAGS_ALIGN_ODD));
1144 		cum_len += seg->ds_len;
1145 		seg++;
1146 		req++;
1147 		req->flags = 0;
1148 	}
1149 	req--;
1150 	/* pad runts to 60 bytes */
1151 	if (cum_len < 60) {
1152 		req++;
1153 		req->addr_low =
1154 			htobe32(MYRI10GE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
1155 		req->addr_high =
1156 			htobe32(MYRI10GE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
1157 		req->length = htobe16(60 - cum_len);
1158 		req->cksum_offset = cksum_offset;
1159 		req->flags |= flags | ((cum_len & 1) *
1160                                        htobe16(MYRI10GE_MCP_ETHER_FLAGS_ALIGN_ODD));
1161 		cnt++;
1162 	}
1163 	req->flags &= ~(htobe16(MYRI10GE_MCP_ETHER_FLAGS_NOT_LAST));
1164 	tx->info[idx].m = m;
1165 	if (tx->wc_fifo == NULL)
1166 		myri10ge_submit_req(tx, tx->req_list, cnt);
1167 	else
1168 		myri10ge_submit_req_wc(tx, tx->req_list, cnt);
1169 	return;
1170 
1171 drop:
1172 	m_freem(m);
1173 	ifp->if_oerrors++;
1174 	return;
1175 }
1176 
1177 
1178 static void
1179 myri10ge_start_locked(myri10ge_softc_t *sc)
1180 {
1181 	int avail;
1182 	struct mbuf *m;
1183 	struct ifnet *ifp;
1184 
1185 
1186 	ifp = sc->ifp;
1187 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1188 		 /* dequeue the packet */
1189 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1190 
1191 		/* let BPF see it */
1192 		BPF_MTAP(ifp, m);
1193 
1194 		/* give it to the nic */
1195 		myri10ge_encap(sc, m);
1196 
1197 		/* leave an extra slot keep the ring from wrapping */
1198 		avail = sc->tx.mask - (sc->tx.req - sc->tx.done);
1199 		if (avail < MYRI10GE_MCP_ETHER_MAX_SEND_DESC) {
1200 			sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1201 			return;
1202 		}
1203 	}
1204 }
1205 
1206 static void
1207 myri10ge_start(struct ifnet *ifp)
1208 {
1209 	myri10ge_softc_t *sc = ifp->if_softc;
1210 
1211 
1212 	mtx_lock(&sc->tx_lock);
1213 	myri10ge_start_locked(sc);
1214 	mtx_unlock(&sc->tx_lock);
1215 }
1216 
1217 static int
1218 myri10ge_get_buf_small(myri10ge_softc_t *sc, bus_dmamap_t map, int idx)
1219 {
1220 	bus_dma_segment_t seg;
1221 	struct mbuf *m;
1222 	myri10ge_rx_buf_t *rx = &sc->rx_small;
1223 	int cnt, err;
1224 
1225 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1226 	if (m == NULL) {
1227 		rx->alloc_fail++;
1228 		err = ENOBUFS;
1229 		goto done;
1230 	}
1231 	m->m_len = MHLEN;
1232 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1233 				      &seg, &cnt, BUS_DMA_NOWAIT);
1234 	if (err != 0) {
1235 		m_free(m);
1236 		goto done;
1237 	}
1238 	rx->info[idx].m = m;
1239 	rx->shadow[idx].addr_low =
1240 		htobe32(MYRI10GE_LOWPART_TO_U32(seg.ds_addr));
1241 	rx->shadow[idx].addr_high =
1242 		htobe32(MYRI10GE_HIGHPART_TO_U32(seg.ds_addr));
1243 
1244 done:
1245 	if ((idx & 7) == 7) {
1246                 myri10ge_pio_copy(&rx->lanai[idx - 7],
1247 				  &rx->shadow[idx - 7],
1248                                   8 * sizeof (*rx->lanai));
1249                 mb();
1250         }
1251 	return err;
1252 }
1253 
1254 static int
1255 myri10ge_get_buf_big(myri10ge_softc_t *sc, bus_dmamap_t map, int idx)
1256 {
1257 	bus_dma_segment_t seg;
1258 	struct mbuf *m;
1259 	myri10ge_rx_buf_t *rx = &sc->rx_big;
1260 	int cnt, err;
1261 
1262 	m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes);
1263 	if (m == NULL) {
1264 		rx->alloc_fail++;
1265 		err = ENOBUFS;
1266 		goto done;
1267 	}
1268 	m->m_len = sc->big_bytes;
1269 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1270 				      &seg, &cnt, BUS_DMA_NOWAIT);
1271 	if (err != 0) {
1272 		m_free(m);
1273 		goto done;
1274 	}
1275 	rx->info[idx].m = m;
1276 	rx->shadow[idx].addr_low =
1277 		htobe32(MYRI10GE_LOWPART_TO_U32(seg.ds_addr));
1278 	rx->shadow[idx].addr_high =
1279 		htobe32(MYRI10GE_HIGHPART_TO_U32(seg.ds_addr));
1280 
1281 done:
1282 	if ((idx & 7) == 7) {
1283                 myri10ge_pio_copy(&rx->lanai[idx - 7],
1284 				  &rx->shadow[idx - 7],
1285                                   8 * sizeof (*rx->lanai));
1286                 mb();
1287         }
1288 	return err;
1289 }
1290 
1291 static inline void
1292 myri10ge_rx_done_big(myri10ge_softc_t *sc, int len, int csum, int flags)
1293 {
1294 	struct ifnet *ifp;
1295 	struct mbuf *m = 0; 		/* -Wunitialized */
1296 	struct mbuf *m_prev = 0;	/* -Wunitialized */
1297 	struct mbuf *m_head = 0;
1298 	bus_dmamap_t old_map;
1299 	myri10ge_rx_buf_t *rx;
1300 	int idx;
1301 
1302 
1303 	rx = &sc->rx_big;
1304 	ifp = sc->ifp;
1305 	while (len > 0) {
1306 		idx = rx->cnt & rx->mask;
1307                 rx->cnt++;
1308 		/* save a pointer to the received mbuf */
1309 		m = rx->info[idx].m;
1310 		/* try to replace the received mbuf */
1311 		if (myri10ge_get_buf_big(sc, rx->extra_map, idx)) {
1312 			goto drop;
1313 		}
1314 		/* unmap the received buffer */
1315 		old_map = rx->info[idx].map;
1316 		bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1317 		bus_dmamap_unload(rx->dmat, old_map);
1318 
1319 		/* swap the bus_dmamap_t's */
1320 		rx->info[idx].map = rx->extra_map;
1321 		rx->extra_map = old_map;
1322 
1323 		/* chain multiple segments together */
1324 		if (!m_head) {
1325 			m_head = m;
1326 			/* mcp implicitly skips 1st bytes so that
1327 			 * packet is properly aligned */
1328 			m->m_data += MYRI10GE_MCP_ETHER_PAD;
1329 			m->m_pkthdr.len = len;
1330 			m->m_len = sc->big_bytes - MYRI10GE_MCP_ETHER_PAD;
1331 		} else {
1332 			m->m_len = sc->big_bytes;
1333 			m->m_flags &= ~M_PKTHDR;
1334 			m_prev->m_next = m;
1335 		}
1336 		len -= m->m_len;
1337 		m_prev = m;
1338 	}
1339 
1340 	/* trim trailing garbage from the last mbuf in the chain.  If
1341 	 * there is any garbage, len will be negative */
1342 	m->m_len += len;
1343 
1344 	/* if the checksum is valid, mark it in the mbuf header */
1345 	if (sc->csum_flag & flags) {
1346 		m_head->m_pkthdr.csum_data = csum;
1347 		m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID;
1348 	}
1349 
1350 	/* pass the frame up the stack */
1351 	m_head->m_pkthdr.rcvif = ifp;
1352 	ifp->if_ipackets++;
1353 	(*ifp->if_input)(ifp, m_head);
1354 	return;
1355 
1356 drop:
1357 	/* drop the frame -- the old mbuf(s) are re-cycled by running
1358 	   every slot through the allocator */
1359         if (m_head) {
1360                 len -= sc->big_bytes;
1361                 m_freem(m_head);
1362         } else {
1363                 len -= (sc->big_bytes + MYRI10GE_MCP_ETHER_PAD);
1364         }
1365         while ((int)len > 0) {
1366                 idx = rx->cnt & rx->mask;
1367                 rx->cnt++;
1368                 m = rx->info[idx].m;
1369                 if (0 == (myri10ge_get_buf_big(sc, rx->extra_map, idx))) {
1370 			m_freem(m);
1371 			/* unmap the received buffer */
1372 			old_map = rx->info[idx].map;
1373 			bus_dmamap_sync(rx->dmat, old_map,
1374 					BUS_DMASYNC_POSTREAD);
1375 			bus_dmamap_unload(rx->dmat, old_map);
1376 
1377 			/* swap the bus_dmamap_t's */
1378 			rx->info[idx].map = rx->extra_map;
1379 			rx->extra_map = old_map;
1380 		}
1381                 len -= sc->big_bytes;
1382         }
1383 
1384 	ifp->if_ierrors++;
1385 
1386 }
1387 
1388 
1389 static inline void
1390 myri10ge_rx_done_small(myri10ge_softc_t *sc, uint32_t len,
1391                        uint32_t csum, uint32_t flags)
1392 {
1393 	struct ifnet *ifp;
1394 	struct mbuf *m;
1395 	myri10ge_rx_buf_t *rx;
1396 	bus_dmamap_t old_map;
1397 	int idx;
1398 
1399 	ifp = sc->ifp;
1400 	rx = &sc->rx_small;
1401 	idx = rx->cnt & rx->mask;
1402 	rx->cnt++;
1403 	/* save a pointer to the received mbuf */
1404 	m = rx->info[idx].m;
1405 	/* try to replace the received mbuf */
1406 	if (myri10ge_get_buf_small(sc, rx->extra_map, idx)) {
1407 		/* drop the frame -- the old mbuf is re-cycled */
1408 		ifp->if_ierrors++;
1409 		return;
1410 	}
1411 
1412 	/* unmap the received buffer */
1413 	old_map = rx->info[idx].map;
1414 	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1415 	bus_dmamap_unload(rx->dmat, old_map);
1416 
1417 	/* swap the bus_dmamap_t's */
1418 	rx->info[idx].map = rx->extra_map;
1419 	rx->extra_map = old_map;
1420 
1421 	/* mcp implicitly skips 1st 2 bytes so that packet is properly
1422 	 * aligned */
1423 	m->m_data += MYRI10GE_MCP_ETHER_PAD;
1424 
1425 	/* if the checksum is valid, mark it in the mbuf header */
1426 	if (sc->csum_flag & flags) {
1427 		m->m_pkthdr.csum_data = csum;
1428 		m->m_pkthdr.csum_flags = CSUM_DATA_VALID;
1429 	}
1430 
1431 	/* pass the frame up the stack */
1432 	m->m_pkthdr.rcvif = ifp;
1433 	m->m_len = m->m_pkthdr.len = len;
1434 	ifp->if_ipackets++;
1435 	(*ifp->if_input)(ifp, m);
1436 }
1437 
1438 static inline void
1439 myri10ge_tx_done(myri10ge_softc_t *sc, uint32_t mcp_idx)
1440 {
1441 	struct ifnet *ifp;
1442 	myri10ge_tx_buf_t *tx;
1443 	struct mbuf *m;
1444 	bus_dmamap_t map;
1445 	int idx;
1446 
1447 	tx = &sc->tx;
1448 	ifp = sc->ifp;
1449 	while (tx->done != mcp_idx) {
1450 		idx = tx->done & tx->mask;
1451 		tx->done++;
1452 		m = tx->info[idx].m;
1453 		/* mbuf and DMA map only attached to the first
1454 		   segment per-mbuf */
1455 		if (m != NULL) {
1456 			ifp->if_opackets++;
1457 			tx->info[idx].m = NULL;
1458 			map = tx->info[idx].map;
1459 			bus_dmamap_unload(tx->dmat, map);
1460 			m_freem(m);
1461 		}
1462 	}
1463 
1464 	/* If we have space, clear IFF_OACTIVE to tell the stack that
1465            its OK to send packets */
1466 
1467 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
1468 	    tx->req - tx->done < (tx->mask + 1)/4) {
1469 		mtx_lock(&sc->tx_lock);
1470 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1471 		myri10ge_start_locked(sc);
1472 		mtx_unlock(&sc->tx_lock);
1473 	}
1474 }
1475 
1476 static void
1477 myri10ge_dump_interrupt_queues(myri10ge_softc_t *sc, int maxslot)
1478 {
1479   int intrq, slot, type;
1480   static int call_cnt = 0;
1481 
1482   /* only do it a few times to avoid filling the message buffer */
1483   if (call_cnt > 10)
1484     return;
1485 
1486   call_cnt++;
1487 
1488   device_printf(sc->dev, "--------- Dumping interrupt queue state ----- \n");
1489   device_printf(sc->dev, "currently expecting interrupts on queue %d\n",
1490 		sc->intr.intrq);
1491   device_printf(sc->dev, " q  slot  status \n");
1492   device_printf(sc->dev, "--- ---- -------- \n");
1493   for (intrq = 0; intrq < 2; intrq++) {
1494 	  for (slot = 0; slot <= maxslot; slot++) {
1495       type = sc->intr.q[intrq][slot].type;
1496 #if 0
1497       if (type == 0 && slot != 0)
1498         continue;
1499 #endif
1500       device_printf(sc->dev, "[%d]:[%d]: type   = 0x%x\n", intrq, slot,
1501 		    type);
1502       device_printf(sc->dev, "[%d]:[%d]: flag    = 0x%x\n", intrq, slot,
1503                 sc->intr.q[intrq][slot].flag);
1504       device_printf(sc->dev, "[%d]:[%d]: index  = 0x%x\n", intrq, slot,
1505                 be16toh(sc->intr.q[intrq][slot].index));
1506       device_printf(sc->dev, "[%d]:[%d]: seqnum = 0x%x\n", intrq, slot,
1507                 (unsigned int)be32toh(sc->intr.q[intrq][slot].seqnum));
1508       device_printf(sc->dev, "[%d]:[%d]: data0  = 0x%x\n", intrq, slot,
1509                 (unsigned int)be32toh(sc->intr.q[intrq][slot].data0));
1510       device_printf(sc->dev, "[%d]:[%d]: data1  = 0x%x\n", intrq, slot,
1511                 (unsigned int)be32toh(sc->intr.q[intrq][slot].data1));
1512 
1513     }
1514   }
1515 
1516 }
1517 
1518 static inline void
1519 myri10ge_claim_irq(myri10ge_softc_t *sc)
1520 {
1521 	volatile uint32_t dontcare;
1522 
1523 
1524 	*sc->irq_claim = 0;
1525 	mb();
1526 
1527 	/* do a PIO read to ensure that PIO write to claim the irq has
1528 	   hit the nic before we exit the interrupt handler */
1529 	if (!myri10ge_skip_pio_read) {
1530 		dontcare = *(volatile uint32_t *)sc->sram;
1531 		mb();
1532 	}
1533 }
1534 
1535 static void
1536 myri10ge_intr(void *arg)
1537 {
1538 	myri10ge_softc_t *sc = arg;
1539 	int intrq, claimed, flags, count, length, ip_csum;
1540         uint32_t raw, slot;
1541 	uint8_t type;
1542 
1543 
1544 	intrq = sc->intr.intrq;
1545 	claimed = 0;
1546 	bus_dmamap_sync(sc->intr.dma[intrq].dmat,
1547 			sc->intr.dma[intrq].map, BUS_DMASYNC_POSTREAD);
1548 	if (sc->msi_enabled) {
1549 		/* We know we can immediately claim the interrupt */
1550 		myri10ge_claim_irq(sc);
1551 		claimed = 1;
1552 	} else {
1553 		/* Check to see if we have the last event in the queue
1554 		   ready.  If so, ack it as early as possible.  This
1555 		   allows more time to get the interrupt line
1556 		   de-asserted prior to the EOI and reduces the chance
1557 		   of seeing a spurious irq caused by the interrupt
1558 		   line remaining high after EOI */
1559 
1560 		slot = be16toh(sc->intr.q[intrq][0].index) - 1;
1561 		if (slot < myri10ge_max_intr_slots &&
1562 		    sc->intr.q[intrq][slot].type  != 0 &&
1563 		    sc->intr.q[intrq][slot].flag != 0) {
1564 			myri10ge_claim_irq(sc);
1565 			claimed = 1;
1566 		}
1567 	}
1568 
1569 	/* walk each slot in the current queue, processing events until
1570 	   we reach an event with a zero type */
1571 	for (slot = sc->intr.slot; slot < myri10ge_max_intr_slots; slot++) {
1572 		type = sc->intr.q[intrq][slot].type;
1573 
1574 		/* check for partially completed DMA of events when
1575 		   using non-MSI interrupts */
1576 		if (__predict_false(!claimed)) {
1577 			mb();
1578 			/* look if there is somscing in the queue */
1579 			if (type == 0) {
1580 				/* save the current slot for the next
1581 				 * time we (re-)enter this routine */
1582 				if (sc->intr.slot == slot) {
1583 					sc->intr.spurious++;
1584 				}
1585 				sc->intr.slot = slot;
1586 				return;
1587 			}
1588 		}
1589 		if (__predict_false(htobe32(sc->intr.q[intrq][slot].seqnum) !=
1590 			     sc->intr.seqnum++)) {
1591 			device_printf(sc->dev, "Bad interrupt!\n");
1592 			device_printf(sc->dev,
1593 				      "bad irq seqno"
1594 				      "(got 0x%x, expected 0x%x) \n",
1595 				      (unsigned int)htobe32(sc->intr.q[intrq][slot].seqnum),
1596 				      sc->intr.seqnum);
1597 			device_printf(sc->dev, "intrq = %d, slot = %d\n",
1598 				      intrq, slot);
1599 			myri10ge_dump_interrupt_queues(sc, slot);
1600 			device_printf(sc->dev,
1601 				      "Disabling futher interrupt handling\n");
1602 			bus_teardown_intr(sc->dev, sc->irq_res,
1603 					  sc->ih);
1604 			sc->ih = NULL;
1605 			return;
1606 		}
1607 
1608 		switch (type) {
1609 		case MYRI10GE_MCP_INTR_ETHER_SEND_DONE:
1610 			myri10ge_tx_done(sc, be32toh(sc->intr.q[intrq][slot].data0));
1611 
1612 			if (__predict_true(sc->intr.q[intrq][slot].data1 == 0))
1613 				break;
1614 
1615 			/* check the link state.  Don't bother to
1616 			 * byteswap, since it can just be 0 or 1 */
1617 			if (sc->link_state != sc->fw_stats->link_up) {
1618 				sc->link_state = sc->fw_stats->link_up;
1619 				if (sc->link_state) {
1620 					if_link_state_change(sc->ifp,
1621 							     LINK_STATE_UP);
1622 					device_printf(sc->dev,
1623 						      "link up\n");
1624 				} else {
1625 					if_link_state_change(sc->ifp,
1626 							     LINK_STATE_DOWN);
1627 					device_printf(sc->dev,
1628 						      "link down\n");
1629 				}
1630 			}
1631 			if (sc->rdma_tags_available !=
1632 			    be32toh(sc->fw_stats->rdma_tags_available)) {
1633 				sc->rdma_tags_available =
1634 					be32toh(sc->fw_stats->rdma_tags_available);
1635 				device_printf(sc->dev, "RDMA timed out!"
1636 					      " %d tags left\n",
1637 					      sc->rdma_tags_available);
1638 			}
1639 
1640 			break;
1641 
1642 
1643 		case MYRI10GE_MCP_INTR_ETHER_RECV_SMALL:
1644 			raw = be32toh(sc->intr.q[intrq][slot].data0);
1645 			count = 0xff & raw;
1646 			flags = raw >> 8;
1647 			raw = be32toh(sc->intr.q[intrq][slot].data1);
1648 			ip_csum = raw >> 16;
1649 			length = 0xffff & raw;
1650 			myri10ge_rx_done_small(sc, length, ip_csum,
1651 					       flags);
1652 			break;
1653 
1654 		case MYRI10GE_MCP_INTR_ETHER_RECV_BIG:
1655 			raw = be32toh(sc->intr.q[intrq][slot].data0);
1656 			count = 0xff & raw;
1657 			flags = raw >> 8;
1658 			raw = be32toh(sc->intr.q[intrq][slot].data1);
1659 			ip_csum = raw >> 16;
1660 			length = 0xffff & raw;
1661 			myri10ge_rx_done_big(sc, length, ip_csum,
1662 					     flags);
1663 
1664 			break;
1665 
1666 		case MYRI10GE_MCP_INTR_LINK_CHANGE:
1667 			/* not yet implemented in firmware */
1668 			break;
1669 
1670 		case MYRI10GE_MCP_INTR_ETHER_DOWN:
1671 			sc->down_cnt++;
1672 			wakeup(&sc->down_cnt);
1673 			break;
1674 
1675 		default:
1676 			device_printf(sc->dev, "Unknown interrupt type %d\n",
1677 				      type);
1678 		}
1679 		sc->intr.q[intrq][slot].type = 0;
1680 		if (sc->intr.q[intrq][slot].flag != 0) {
1681 			if (!claimed) {
1682 				myri10ge_claim_irq(sc);
1683 			}
1684 			sc->intr.slot = 0;
1685 			sc->intr.q[intrq][slot].flag = 0;
1686 			sc->intr.intrq = ((intrq + 1) & 1);
1687 			return;
1688 		}
1689 	}
1690 
1691 	/* we should never be here unless we're on a shared irq and we have
1692 	   not finished setting up the device */
1693 	return;
1694 }
1695 
1696 static void
1697 myri10ge_watchdog(struct ifnet *ifp)
1698 {
1699 	printf("%s called\n", __FUNCTION__);
1700 }
1701 
1702 static void
1703 myri10ge_init(void *arg)
1704 {
1705 }
1706 
1707 
1708 
1709 static void
1710 myri10ge_free_mbufs(myri10ge_softc_t *sc)
1711 {
1712 	int i;
1713 
1714 	for (i = 0; i <= sc->rx_big.mask; i++) {
1715 		if (sc->rx_big.info[i].m == NULL)
1716 			continue;
1717 		bus_dmamap_unload(sc->rx_big.dmat,
1718 				  sc->rx_big.info[i].map);
1719 		m_freem(sc->rx_big.info[i].m);
1720 		sc->rx_big.info[i].m = NULL;
1721 	}
1722 
1723 	for (i = 0; i <= sc->rx_big.mask; i++) {
1724 		if (sc->rx_big.info[i].m == NULL)
1725 			continue;
1726 		bus_dmamap_unload(sc->rx_big.dmat,
1727 				  sc->rx_big.info[i].map);
1728 		m_freem(sc->rx_big.info[i].m);
1729 		sc->rx_big.info[i].m = NULL;
1730 	}
1731 
1732 	for (i = 0; i <= sc->tx.mask; i++) {
1733 		if (sc->tx.info[i].m == NULL)
1734 			continue;
1735 		bus_dmamap_unload(sc->tx.dmat,
1736 				  sc->tx.info[i].map);
1737 		m_freem(sc->tx.info[i].m);
1738 		sc->tx.info[i].m = NULL;
1739 	}
1740 }
1741 
1742 static void
1743 myri10ge_free_rings(myri10ge_softc_t *sc)
1744 {
1745 	int i;
1746 
1747 	if (sc->tx.req_bytes != NULL) {
1748 		free(sc->tx.req_bytes, M_DEVBUF);
1749 	}
1750 	if (sc->rx_small.shadow != NULL)
1751 		free(sc->rx_small.shadow, M_DEVBUF);
1752 	if (sc->rx_big.shadow != NULL)
1753 		free(sc->rx_big.shadow, M_DEVBUF);
1754 	if (sc->tx.info != NULL) {
1755 		for (i = 0; i <= sc->tx.mask; i++) {
1756 			if (sc->tx.info[i].map != NULL)
1757 				bus_dmamap_destroy(sc->tx.dmat,
1758 						   sc->tx.info[i].map);
1759 		}
1760 		free(sc->tx.info, M_DEVBUF);
1761 	}
1762 	if (sc->rx_small.info != NULL) {
1763 		for (i = 0; i <= sc->rx_small.mask; i++) {
1764 			if (sc->rx_small.info[i].map != NULL)
1765 				bus_dmamap_destroy(sc->rx_small.dmat,
1766 						   sc->rx_small.info[i].map);
1767 		}
1768 		free(sc->rx_small.info, M_DEVBUF);
1769 	}
1770 	if (sc->rx_big.info != NULL) {
1771 		for (i = 0; i <= sc->rx_big.mask; i++) {
1772 			if (sc->rx_big.info[i].map != NULL)
1773 				bus_dmamap_destroy(sc->rx_big.dmat,
1774 						   sc->rx_big.info[i].map);
1775 		}
1776 		free(sc->rx_big.info, M_DEVBUF);
1777 	}
1778 	if (sc->rx_big.extra_map != NULL)
1779 		bus_dmamap_destroy(sc->rx_big.dmat,
1780 				   sc->rx_big.extra_map);
1781 	if (sc->rx_small.extra_map != NULL)
1782 		bus_dmamap_destroy(sc->rx_small.dmat,
1783 				   sc->rx_small.extra_map);
1784 	if (sc->tx.dmat != NULL)
1785 		bus_dma_tag_destroy(sc->tx.dmat);
1786 	if (sc->rx_small.dmat != NULL)
1787 		bus_dma_tag_destroy(sc->rx_small.dmat);
1788 	if (sc->rx_big.dmat != NULL)
1789 		bus_dma_tag_destroy(sc->rx_big.dmat);
1790 }
1791 
1792 static int
1793 myri10ge_alloc_rings(myri10ge_softc_t *sc)
1794 {
1795 	myri10ge_cmd_t cmd;
1796 	int tx_ring_size, rx_ring_size;
1797 	int tx_ring_entries, rx_ring_entries;
1798 	int i, err;
1799 	unsigned long bytes;
1800 
1801 	/* get ring sizes */
1802 	err = myri10ge_send_cmd(sc,
1803 				MYRI10GE_MCP_CMD_GET_SEND_RING_SIZE,
1804 				&cmd);
1805 	tx_ring_size = cmd.data0;
1806 	err |= myri10ge_send_cmd(sc,
1807 				 MYRI10GE_MCP_CMD_GET_RX_RING_SIZE,
1808 				 &cmd);
1809 	if (err != 0) {
1810 		device_printf(sc->dev, "Cannot determine ring sizes\n");
1811 		goto abort_with_nothing;
1812 	}
1813 
1814 	rx_ring_size = cmd.data0;
1815 
1816 	tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
1817 	rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
1818 	sc->ifp->if_snd.ifq_maxlen = tx_ring_entries - 1;
1819 	sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
1820 
1821 	sc->tx.mask = tx_ring_entries - 1;
1822 	sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
1823 
1824 	err = ENOMEM;
1825 
1826 	/* allocate the tx request copy block */
1827 	bytes = 8 +
1828 		sizeof (*sc->tx.req_list) * (MYRI10GE_MCP_ETHER_MAX_SEND_DESC + 4);
1829 	sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
1830 	if (sc->tx.req_bytes == NULL)
1831 		goto abort_with_nothing;
1832 	/* ensure req_list entries are aligned to 8 bytes */
1833 	sc->tx.req_list = (mcp_kreq_ether_send_t *)
1834 		((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
1835 
1836 	/* allocate the rx shadow rings */
1837 	bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
1838 	sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1839 	if (sc->rx_small.shadow == NULL)
1840 		goto abort_with_alloc;
1841 
1842 	bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
1843 	sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1844 	if (sc->rx_big.shadow == NULL)
1845 		goto abort_with_alloc;
1846 
1847 	/* allocate the host info rings */
1848 	bytes = tx_ring_entries * sizeof (*sc->tx.info);
1849 	sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1850 	if (sc->tx.info == NULL)
1851 		goto abort_with_alloc;
1852 
1853 	bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
1854 	sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1855 	if (sc->rx_small.info == NULL)
1856 		goto abort_with_alloc;
1857 
1858 	bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
1859 	sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1860 	if (sc->rx_big.info == NULL)
1861 		goto abort_with_alloc;
1862 
1863 	/* allocate the busdma resources */
1864 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1865 				 1,			/* alignment */
1866 				 sc->tx.boundary,	/* boundary */
1867 				 BUS_SPACE_MAXADDR,	/* low */
1868 				 BUS_SPACE_MAXADDR,	/* high */
1869 				 NULL, NULL,		/* filter */
1870 				 MYRI10GE_MAX_ETHER_MTU,/* maxsize */
1871 				 MYRI10GE_MCP_ETHER_MAX_SEND_DESC,/* num segs */
1872 				 sc->tx.boundary,	/* maxsegsize */
1873 				 BUS_DMA_ALLOCNOW,	/* flags */
1874 				 NULL, NULL,		/* lock */
1875 				 &sc->tx.dmat);		/* tag */
1876 
1877 	if (err != 0) {
1878 		device_printf(sc->dev, "Err %d allocating tx dmat\n",
1879 			      err);
1880 		goto abort_with_alloc;
1881 	}
1882 
1883 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1884 				 1,			/* alignment */
1885 				 4096,			/* boundary */
1886 				 BUS_SPACE_MAXADDR,	/* low */
1887 				 BUS_SPACE_MAXADDR,	/* high */
1888 				 NULL, NULL,		/* filter */
1889 				 MHLEN,			/* maxsize */
1890 				 1,			/* num segs */
1891 				 MHLEN,			/* maxsegsize */
1892 				 BUS_DMA_ALLOCNOW,	/* flags */
1893 				 NULL, NULL,		/* lock */
1894 				 &sc->rx_small.dmat);	/* tag */
1895 	if (err != 0) {
1896 		device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
1897 			      err);
1898 		goto abort_with_alloc;
1899 	}
1900 
1901 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1902 				 1,			/* alignment */
1903 				 4096,			/* boundary */
1904 				 BUS_SPACE_MAXADDR,	/* low */
1905 				 BUS_SPACE_MAXADDR,	/* high */
1906 				 NULL, NULL,		/* filter */
1907 				 4096,			/* maxsize */
1908 				 1,			/* num segs */
1909 				 4096,			/* maxsegsize */
1910 				 BUS_DMA_ALLOCNOW,	/* flags */
1911 				 NULL, NULL,		/* lock */
1912 				 &sc->rx_big.dmat);	/* tag */
1913 	if (err != 0) {
1914 		device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
1915 			      err);
1916 		goto abort_with_alloc;
1917 	}
1918 
1919 	/* now use these tags to setup dmamaps for each slot
1920 	   in each ring */
1921 	for (i = 0; i <= sc->tx.mask; i++) {
1922 		err = bus_dmamap_create(sc->tx.dmat, 0,
1923 					&sc->tx.info[i].map);
1924 		if (err != 0) {
1925 			device_printf(sc->dev, "Err %d  tx dmamap\n",
1926 			      err);
1927 			goto abort_with_alloc;
1928 		}
1929 	}
1930 	for (i = 0; i <= sc->rx_small.mask; i++) {
1931 		err = bus_dmamap_create(sc->rx_small.dmat, 0,
1932 					&sc->rx_small.info[i].map);
1933 		if (err != 0) {
1934 			device_printf(sc->dev, "Err %d  rx_small dmamap\n",
1935 			      err);
1936 			goto abort_with_alloc;
1937 		}
1938 	}
1939 	err = bus_dmamap_create(sc->rx_small.dmat, 0,
1940 				&sc->rx_small.extra_map);
1941 	if (err != 0) {
1942 		device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
1943 			      err);
1944 			goto abort_with_alloc;
1945 	}
1946 
1947 	for (i = 0; i <= sc->rx_big.mask; i++) {
1948 		err = bus_dmamap_create(sc->rx_big.dmat, 0,
1949 					&sc->rx_big.info[i].map);
1950 		if (err != 0) {
1951 			device_printf(sc->dev, "Err %d  rx_big dmamap\n",
1952 			      err);
1953 			goto abort_with_alloc;
1954 		}
1955 	}
1956 	err = bus_dmamap_create(sc->rx_big.dmat, 0,
1957 				&sc->rx_big.extra_map);
1958 	if (err != 0) {
1959 		device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
1960 			      err);
1961 			goto abort_with_alloc;
1962 	}
1963 	return 0;
1964 
1965 abort_with_alloc:
1966 	myri10ge_free_rings(sc);
1967 
1968 abort_with_nothing:
1969 	return err;
1970 }
1971 
1972 static int
1973 myri10ge_open(myri10ge_softc_t *sc)
1974 {
1975 	myri10ge_cmd_t cmd;
1976 	int i, err;
1977 	bus_dmamap_t map;
1978 
1979 
1980 	err = myri10ge_reset(sc);
1981 	if (err != 0) {
1982 		device_printf(sc->dev, "failed to reset\n");
1983 		return EIO;
1984 	}
1985 
1986 	if (MCLBYTES >=
1987 	    sc->ifp->if_mtu + ETHER_HDR_LEN + MYRI10GE_MCP_ETHER_PAD)
1988 		sc->big_bytes = MCLBYTES;
1989 	else
1990 		sc->big_bytes = MJUMPAGESIZE;
1991 
1992 	err = myri10ge_alloc_rings(sc);
1993 	if (err != 0) {
1994 		device_printf(sc->dev, "failed to allocate rings\n");
1995 		return err;
1996 	}
1997 
1998 	err = bus_setup_intr(sc->dev, sc->irq_res,
1999 			     INTR_TYPE_NET | INTR_MPSAFE,
2000 			     myri10ge_intr, sc, &sc->ih);
2001 	if (err != 0) {
2002 		goto abort_with_rings;
2003 	}
2004 
2005 	/* get the lanai pointers to the send and receive rings */
2006 
2007 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_GET_SEND_OFFSET, &cmd);
2008 	sc->tx.lanai =
2009 		(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
2010 	err |= myri10ge_send_cmd(sc,
2011 				 MYRI10GE_MCP_CMD_GET_SMALL_RX_OFFSET, &cmd);
2012 	sc->rx_small.lanai =
2013 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2014 	err |= myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_GET_BIG_RX_OFFSET, &cmd);
2015 	sc->rx_big.lanai =
2016 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2017 
2018 	if (err != 0) {
2019 		device_printf(sc->dev,
2020 			      "failed to get ring sizes or locations\n");
2021 		err = EIO;
2022 		goto abort_with_irq;
2023 	}
2024 
2025 	if (sc->wc) {
2026 		sc->tx.wc_fifo = sc->sram + 0x200000;
2027 		sc->rx_small.wc_fifo = sc->sram + 0x300000;
2028 		sc->rx_big.wc_fifo = sc->sram + 0x340000;
2029 	} else {
2030 		sc->tx.wc_fifo = 0;
2031 		sc->rx_small.wc_fifo = 0;
2032 		sc->rx_big.wc_fifo = 0;
2033 	}
2034 
2035 
2036 	/* stock receive rings */
2037 	for (i = 0; i <= sc->rx_small.mask; i++) {
2038 		map = sc->rx_small.info[i].map;
2039 		err = myri10ge_get_buf_small(sc, map, i);
2040 		if (err) {
2041 			device_printf(sc->dev, "alloced %d/%d smalls\n",
2042 				      i, sc->rx_small.mask + 1);
2043 			goto abort;
2044 		}
2045 	}
2046 	for (i = 0; i <= sc->rx_big.mask; i++) {
2047 		map = sc->rx_big.info[i].map;
2048 		err = myri10ge_get_buf_big(sc, map, i);
2049 		if (err) {
2050 			device_printf(sc->dev, "alloced %d/%d bigs\n",
2051 				      i, sc->rx_big.mask + 1);
2052 			goto abort;
2053 		}
2054 	}
2055 
2056 	/* Give the firmware the mtu and the big and small buffer
2057 	   sizes.  The firmware wants the big buf size to be a power
2058 	   of two. Luckily, FreeBSD's clusters are powers of two */
2059 	cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN;
2060 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_MTU, &cmd);
2061 	cmd.data0 = MHLEN;
2062 	err |= myri10ge_send_cmd(sc,
2063 				 MYRI10GE_MCP_CMD_SET_SMALL_BUFFER_SIZE,
2064 				 &cmd);
2065 	cmd.data0 = sc->big_bytes;
2066 	err  |= myri10ge_send_cmd(sc,
2067 				  MYRI10GE_MCP_CMD_SET_BIG_BUFFER_SIZE,
2068 				  &cmd);
2069 	/* Now give him the pointer to the stats block */
2070 	cmd.data0 = MYRI10GE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
2071 	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
2072 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_STATS_DMA, &cmd);
2073 
2074 	if (err != 0) {
2075 		device_printf(sc->dev, "failed to setup params\n");
2076 		goto abort;
2077 	}
2078 
2079 	/* Finally, start the firmware running */
2080 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_ETHERNET_UP, &cmd);
2081 	if (err) {
2082 		device_printf(sc->dev, "Couldn't bring up link\n");
2083 		goto abort;
2084 	}
2085 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2086 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2087 
2088 	return 0;
2089 
2090 
2091 abort:
2092 	myri10ge_free_mbufs(sc);
2093 abort_with_irq:
2094 	bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2095 abort_with_rings:
2096 	myri10ge_free_rings(sc);
2097 	return err;
2098 }
2099 
2100 static int
2101 myri10ge_close(myri10ge_softc_t *sc)
2102 {
2103 	myri10ge_cmd_t cmd;
2104 	int err, old_down_cnt;
2105 
2106 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2107 	old_down_cnt = sc->down_cnt;
2108 	mb();
2109 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_ETHERNET_DOWN, &cmd);
2110 	if (err) {
2111 		device_printf(sc->dev, "Couldn't bring down link\n");
2112 	}
2113 	if (old_down_cnt == sc->down_cnt) {
2114 		/* wait for down irq */
2115 		(void)tsleep(&sc->down_cnt, PWAIT, "down myri10ge", hz);
2116 	}
2117 	if (old_down_cnt == sc->down_cnt) {
2118 		device_printf(sc->dev, "never got down irq\n");
2119 	}
2120 	if (sc->ih != NULL)
2121 		bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2122 	myri10ge_free_mbufs(sc);
2123 	myri10ge_free_rings(sc);
2124 	return 0;
2125 }
2126 
2127 
2128 static int
2129 myri10ge_media_change(struct ifnet *ifp)
2130 {
2131 	return EINVAL;
2132 }
2133 
2134 static int
2135 myri10ge_change_mtu(myri10ge_softc_t *sc, int mtu)
2136 {
2137 	struct ifnet *ifp = sc->ifp;
2138 	int real_mtu, old_mtu;
2139 	int err = 0;
2140 
2141 
2142 	real_mtu = mtu + ETHER_HDR_LEN;
2143 	if ((real_mtu > MYRI10GE_MAX_ETHER_MTU) ||
2144 	    real_mtu < 60)
2145 		return EINVAL;
2146 	sx_xlock(&sc->driver_lock);
2147 	old_mtu = ifp->if_mtu;
2148 	ifp->if_mtu = mtu;
2149 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2150 		myri10ge_close(sc);
2151 		err = myri10ge_open(sc);
2152 		if (err != 0) {
2153 			ifp->if_mtu = old_mtu;
2154 			myri10ge_close(sc);
2155 			(void) myri10ge_open(sc);
2156 		}
2157 	}
2158 	sx_xunlock(&sc->driver_lock);
2159 	return err;
2160 }
2161 
2162 static void
2163 myri10ge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2164 {
2165 	myri10ge_softc_t *sc = ifp->if_softc;
2166 
2167 
2168 	if (sc == NULL)
2169 		return;
2170 	ifmr->ifm_status = IFM_AVALID;
2171 	ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0;
2172 	ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
2173 	ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0;
2174 }
2175 
2176 static int
2177 myri10ge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2178 {
2179 	myri10ge_softc_t *sc = ifp->if_softc;
2180 	struct ifreq *ifr = (struct ifreq *)data;
2181 	int err, mask;
2182 
2183 	err = 0;
2184 	switch (command) {
2185 	case SIOCSIFADDR:
2186 	case SIOCGIFADDR:
2187 		err = ether_ioctl(ifp, command, data);
2188 		break;
2189 
2190 	case SIOCSIFMTU:
2191 		err = myri10ge_change_mtu(sc, ifr->ifr_mtu);
2192 		break;
2193 
2194 	case SIOCSIFFLAGS:
2195 		sx_xlock(&sc->driver_lock);
2196 		if (ifp->if_flags & IFF_UP) {
2197 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
2198 				err = myri10ge_open(sc);
2199 		} else {
2200 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2201 				myri10ge_close(sc);
2202 		}
2203 		sx_xunlock(&sc->driver_lock);
2204 		break;
2205 
2206 	case SIOCADDMULTI:
2207 	case SIOCDELMULTI:
2208 		err = 0;
2209 		break;
2210 
2211 	case SIOCSIFCAP:
2212 		sx_xlock(&sc->driver_lock);
2213 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2214 		if (mask & IFCAP_TXCSUM) {
2215 			if (IFCAP_TXCSUM & ifp->if_capenable) {
2216 				ifp->if_capenable &= ~IFCAP_TXCSUM;
2217 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
2218 			} else {
2219 				ifp->if_capenable |= IFCAP_TXCSUM;
2220 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
2221 			}
2222 		} else if (mask & IFCAP_RXCSUM) {
2223 			if (IFCAP_RXCSUM & ifp->if_capenable) {
2224 				ifp->if_capenable &= ~IFCAP_RXCSUM;
2225 				sc->csum_flag &= ~MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
2226 			} else {
2227 				ifp->if_capenable |= IFCAP_RXCSUM;
2228 				sc->csum_flag |= MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
2229 			}
2230 		}
2231 		sx_xunlock(&sc->driver_lock);
2232 		break;
2233 
2234 	case SIOCGIFMEDIA:
2235 		err = ifmedia_ioctl(ifp, (struct ifreq *)data,
2236 				    &sc->media, command);
2237                 break;
2238 
2239 	default:
2240 		err = ENOTTY;
2241         }
2242 	return err;
2243 }
2244 
2245 static void
2246 myri10ge_fetch_tunables(myri10ge_softc_t *sc)
2247 {
2248 
2249 	TUNABLE_INT_FETCH("hw.myri10ge.flow_control_enabled",
2250 			  &myri10ge_flow_control);
2251 	TUNABLE_INT_FETCH("hw.myri10ge.intr_coal_delay",
2252 			  &myri10ge_intr_coal_delay);
2253 	TUNABLE_INT_FETCH("hw.myri10ge.nvidia_ecrc_enable",
2254 			  &myri10ge_nvidia_ecrc_enable);
2255 	TUNABLE_INT_FETCH("hw.myri10ge.skip_pio_read",
2256 			  &myri10ge_skip_pio_read);
2257 
2258 	if (myri10ge_intr_coal_delay < 0 ||
2259 	    myri10ge_intr_coal_delay > 10*1000)
2260 		myri10ge_intr_coal_delay = 30;
2261 	sc->pause = myri10ge_flow_control;
2262 }
2263 
2264 static int
2265 myri10ge_attach(device_t dev)
2266 {
2267 	myri10ge_softc_t *sc = device_get_softc(dev);
2268 	struct ifnet *ifp;
2269 	size_t bytes;
2270 	int rid, err, i;
2271 	uint16_t cmd;
2272 
2273 	sc->dev = dev;
2274 	myri10ge_fetch_tunables(sc);
2275 
2276 	err = bus_dma_tag_create(NULL,			/* parent */
2277 				 1,			/* alignment */
2278 				 4096,			/* boundary */
2279 				 BUS_SPACE_MAXADDR,	/* low */
2280 				 BUS_SPACE_MAXADDR,	/* high */
2281 				 NULL, NULL,		/* filter */
2282 				 MYRI10GE_MAX_ETHER_MTU,/* maxsize */
2283 				 MYRI10GE_MCP_ETHER_MAX_SEND_DESC, /* num segs */
2284 				 4096,			/* maxsegsize */
2285 				 0,			/* flags */
2286 				 NULL, NULL,		/* lock */
2287 				 &sc->parent_dmat);	/* tag */
2288 
2289 	if (err != 0) {
2290 		device_printf(sc->dev, "Err %d allocating parent dmat\n",
2291 			      err);
2292 		goto abort_with_nothing;
2293 	}
2294 
2295 	ifp = sc->ifp = if_alloc(IFT_ETHER);
2296 	if (ifp == NULL) {
2297 		device_printf(dev, "can not if_alloc()\n");
2298 		err = ENOSPC;
2299 		goto abort_with_parent_dmat;
2300 	}
2301 	mtx_init(&sc->cmd_lock, NULL,
2302 		 MTX_NETWORK_LOCK, MTX_DEF);
2303 	mtx_init(&sc->tx_lock, device_get_nameunit(dev),
2304 		 MTX_NETWORK_LOCK, MTX_DEF);
2305 	sx_init(&sc->driver_lock, device_get_nameunit(dev));
2306 
2307 	/* Enable DMA and Memory space access */
2308 	pci_enable_busmaster(dev);
2309 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2310 	cmd |= PCIM_CMD_MEMEN;
2311 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2312 
2313 	/* Map the board into the kernel */
2314 	rid = PCIR_BARS;
2315 	sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
2316 					 ~0, 1, RF_ACTIVE);
2317 	if (sc->mem_res == NULL) {
2318 		device_printf(dev, "could not map memory\n");
2319 		err = ENXIO;
2320 		goto abort_with_lock;
2321 	}
2322 	sc->sram = rman_get_virtual(sc->mem_res);
2323 	sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
2324 	if (sc->sram_size > rman_get_size(sc->mem_res)) {
2325 		device_printf(dev, "impossible memory region size %ld\n",
2326 			      rman_get_size(sc->mem_res));
2327 		err = ENXIO;
2328 		goto abort_with_mem_res;
2329 	}
2330 
2331 	/* make NULL terminated copy of the EEPROM strings section of
2332 	   lanai SRAM */
2333 	bzero(sc->eeprom_strings, MYRI10GE_EEPROM_STRINGS_SIZE);
2334 	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
2335 				rman_get_bushandle(sc->mem_res),
2336 				sc->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE,
2337 				sc->eeprom_strings,
2338 				MYRI10GE_EEPROM_STRINGS_SIZE - 2);
2339 	err = myri10ge_parse_strings(sc);
2340 	if (err != 0)
2341 		goto abort_with_mem_res;
2342 
2343 	/* Enable write combining for efficient use of PCIe bus */
2344 	myri10ge_enable_wc(sc);
2345 
2346 	/* Allocate the out of band dma memory */
2347 	err = myri10ge_dma_alloc(sc, &sc->cmd_dma,
2348 				 sizeof (myri10ge_cmd_t), 64);
2349 	if (err != 0)
2350 		goto abort_with_mem_res;
2351 	sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
2352 	err = myri10ge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
2353 	if (err != 0)
2354 		goto abort_with_cmd_dma;
2355 
2356 	err = myri10ge_dma_alloc(sc, &sc->fw_stats_dma,
2357 				 sizeof (*sc->fw_stats), 64);
2358 	if (err != 0)
2359 		goto abort_with_zeropad_dma;
2360 	sc->fw_stats = (mcp_stats_t *)sc->fw_stats_dma.addr;
2361 
2362 
2363 	/* allocate interrupt queues */
2364 	bytes = myri10ge_max_intr_slots * sizeof (*sc->intr.q[0]);
2365 	for (i = 0; i < MYRI10GE_NUM_INTRQS; i++) {
2366 		err = myri10ge_dma_alloc(sc, &sc->intr.dma[i],
2367 					 bytes, 4096);
2368 		if (err != 0)
2369 			goto abort_with_intrq;
2370 		sc->intr.q[i] = (mcp_slot_t *)sc->intr.dma[i].addr;
2371 	}
2372 
2373 	/* Add our ithread  */
2374 	rid = 0;
2375 	sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
2376 					 1, RF_SHAREABLE | RF_ACTIVE);
2377 	if (sc->irq_res == NULL) {
2378 		device_printf(dev, "could not alloc interrupt\n");
2379 		goto abort_with_intrq;
2380 	}
2381 
2382 	/* load the firmware */
2383 	myri10ge_select_firmware(sc);
2384 
2385 	err = myri10ge_load_firmware(sc);
2386 	if (err != 0)
2387 		goto abort_with_irq_res;
2388 	err = myri10ge_reset(sc);
2389 	if (err != 0)
2390 		goto abort_with_irq_res;
2391 
2392 	/* hook into the network stack */
2393 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2394 	ifp->if_baudrate = 100000000;
2395 	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM;
2396 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP;
2397 	ifp->if_capenable = ifp->if_capabilities;
2398 	sc->csum_flag |= MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
2399         ifp->if_init = myri10ge_init;
2400         ifp->if_softc = sc;
2401         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2402         ifp->if_ioctl = myri10ge_ioctl;
2403         ifp->if_start = myri10ge_start;
2404 	ifp->if_watchdog = myri10ge_watchdog;
2405 	ether_ifattach(ifp, sc->mac_addr);
2406 	/* ether_ifattach sets mtu to 1500 */
2407 	ifp->if_mtu = MYRI10GE_MAX_ETHER_MTU - ETHER_HDR_LEN;
2408 
2409 	/* Initialise the ifmedia structure */
2410 	ifmedia_init(&sc->media, 0, myri10ge_media_change,
2411 		     myri10ge_media_status);
2412 	ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL);
2413 	myri10ge_add_sysctls(sc);
2414 	return 0;
2415 
2416 abort_with_irq_res:
2417 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2418 abort_with_intrq:
2419 	for (i = 0;  i < MYRI10GE_NUM_INTRQS; i++) {
2420 		if (sc->intr.q[i] == NULL)
2421 			continue;
2422 		sc->intr.q[i] = NULL;
2423 		myri10ge_dma_free(&sc->intr.dma[i]);
2424 	}
2425 	myri10ge_dma_free(&sc->fw_stats_dma);
2426 abort_with_zeropad_dma:
2427 	myri10ge_dma_free(&sc->zeropad_dma);
2428 abort_with_cmd_dma:
2429 	myri10ge_dma_free(&sc->cmd_dma);
2430 abort_with_mem_res:
2431 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2432 abort_with_lock:
2433 	pci_disable_busmaster(dev);
2434 	mtx_destroy(&sc->cmd_lock);
2435 	mtx_destroy(&sc->tx_lock);
2436 	sx_destroy(&sc->driver_lock);
2437 	if_free(ifp);
2438 abort_with_parent_dmat:
2439 	bus_dma_tag_destroy(sc->parent_dmat);
2440 
2441 abort_with_nothing:
2442 	return err;
2443 }
2444 
2445 static int
2446 myri10ge_detach(device_t dev)
2447 {
2448 	myri10ge_softc_t *sc = device_get_softc(dev);
2449 	int i;
2450 
2451 	sx_xlock(&sc->driver_lock);
2452 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
2453 		myri10ge_close(sc);
2454 	sx_xunlock(&sc->driver_lock);
2455 	ether_ifdetach(sc->ifp);
2456 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2457 	for (i = 0;  i < MYRI10GE_NUM_INTRQS; i++) {
2458 		if (sc->intr.q[i] == NULL)
2459 			continue;
2460 		sc->intr.q[i] = NULL;
2461 		myri10ge_dma_free(&sc->intr.dma[i]);
2462 	}
2463 	myri10ge_dma_free(&sc->fw_stats_dma);
2464 	myri10ge_dma_free(&sc->zeropad_dma);
2465 	myri10ge_dma_free(&sc->cmd_dma);
2466 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2467 	pci_disable_busmaster(dev);
2468 	mtx_destroy(&sc->cmd_lock);
2469 	mtx_destroy(&sc->tx_lock);
2470 	sx_destroy(&sc->driver_lock);
2471 	if_free(sc->ifp);
2472 	bus_dma_tag_destroy(sc->parent_dmat);
2473 	return 0;
2474 }
2475 
2476 static int
2477 myri10ge_shutdown(device_t dev)
2478 {
2479 	return 0;
2480 }
2481 
2482 /*
2483   This file uses Myri10GE driver indentation.
2484 
2485   Local Variables:
2486   c-file-style:"linux"
2487   tab-width:8
2488   End:
2489 */
2490