xref: /freebsd/sys/dev/mxge/if_mxge.c (revision 66c14b21d3ab0b18376563ba643ddb49b4fd33dd)
1 /*******************************************************************************
2 
3 Copyright (c) 2006, Myricom Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in the
14     documentation and/or other materials provided with the distribution.
15 
16  3. Neither the name of the Myricom Inc, nor the names of its
17     contributors may be used to endorse or promote products derived from
18     this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 
32 ***************************************************************************/
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/linker.h>
40 #include <sys/firmware.h>
41 #include <sys/endian.h>
42 #include <sys/sockio.h>
43 #include <sys/mbuf.h>
44 #include <sys/malloc.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/module.h>
48 #include <sys/memrange.h>
49 #include <sys/socket.h>
50 #include <sys/sysctl.h>
51 #include <sys/sx.h>
52 
53 #include <net/if.h>
54 #include <net/if_arp.h>
55 #include <net/ethernet.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 
59 #include <net/bpf.h>
60 
61 #include <net/if_types.h>
62 #include <net/if_vlan_var.h>
63 #include <net/zlib.h>
64 
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 
69 #include <machine/bus.h>
70 #include <machine/resource.h>
71 #include <sys/bus.h>
72 #include <sys/rman.h>
73 
74 #include <dev/pci/pcireg.h>
75 #include <dev/pci/pcivar.h>
76 
77 #include <vm/vm.h>		/* for pmap_mapdev() */
78 #include <vm/pmap.h>
79 
80 #include <dev/myri10ge/myri10ge_mcp.h>
81 #include <dev/myri10ge/mcp_gen_header.h>
82 #include <dev/myri10ge/if_myri10ge_var.h>
83 
84 /* tunable params */
85 static int myri10ge_nvidia_ecrc_enable = 1;
86 static int myri10ge_max_intr_slots = 128;
87 static int myri10ge_intr_coal_delay = 30;
88 static int myri10ge_skip_pio_read = 0;
89 static int myri10ge_flow_control = 1;
90 static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e";
91 static char *myri10ge_fw_aligned = "myri10ge_eth_z8e";
92 
93 static int myri10ge_probe(device_t dev);
94 static int myri10ge_attach(device_t dev);
95 static int myri10ge_detach(device_t dev);
96 static int myri10ge_shutdown(device_t dev);
97 static void myri10ge_intr(void *arg);
98 
99 static device_method_t myri10ge_methods[] =
100 {
101   /* Device interface */
102   DEVMETHOD(device_probe, myri10ge_probe),
103   DEVMETHOD(device_attach, myri10ge_attach),
104   DEVMETHOD(device_detach, myri10ge_detach),
105   DEVMETHOD(device_shutdown, myri10ge_shutdown),
106   {0, 0}
107 };
108 
109 static driver_t myri10ge_driver =
110 {
111   "myri10ge",
112   myri10ge_methods,
113   sizeof(myri10ge_softc_t),
114 };
115 
116 static devclass_t myri10ge_devclass;
117 
118 /* Declare ourselves to be a child of the PCI bus.*/
119 DRIVER_MODULE(myri10ge, pci, myri10ge_driver, myri10ge_devclass, 0, 0);
120 MODULE_DEPEND(myri10ge, firmware, 1, 1, 1);
121 
122 static int
123 myri10ge_probe(device_t dev)
124 {
125   if ((pci_get_vendor(dev) == MYRI10GE_PCI_VENDOR_MYRICOM) &&
126       (pci_get_device(dev) == MYRI10GE_PCI_DEVICE_Z8E)) {
127 	  device_set_desc(dev, "Myri10G-PCIE-8A");
128 	  return 0;
129   }
130   return ENXIO;
131 }
132 
133 static void
134 myri10ge_enable_wc(myri10ge_softc_t *sc)
135 {
136 	struct mem_range_desc mrdesc;
137 	vm_paddr_t pa;
138 	vm_offset_t len;
139 	int err, action;
140 
141 	pa = rman_get_start(sc->mem_res);
142 	len = rman_get_size(sc->mem_res);
143 	mrdesc.mr_base = pa;
144 	mrdesc.mr_len = len;
145 	mrdesc.mr_flags = MDF_WRITECOMBINE;
146 	action = MEMRANGE_SET_UPDATE;
147 	strcpy((char *)&mrdesc.mr_owner, "myri10ge");
148 	err = mem_range_attr_set(&mrdesc, &action);
149 	if (err != 0) {
150 		device_printf(sc->dev,
151 			      "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n",
152 			      (unsigned long)pa, (unsigned long)len, err);
153 	} else {
154 		sc->wc = 1;
155 	}
156 }
157 
158 
159 /* callback to get our DMA address */
160 static void
161 myri10ge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
162 			 int error)
163 {
164 	if (error == 0) {
165 		*(bus_addr_t *) arg = segs->ds_addr;
166 	}
167 }
168 
169 static int
170 myri10ge_dma_alloc(myri10ge_softc_t *sc, myri10ge_dma_t *dma, size_t bytes,
171 		   bus_size_t alignment)
172 {
173 	int err;
174 	device_t dev = sc->dev;
175 
176 	/* allocate DMAable memory tags */
177 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
178 				 alignment,		/* alignment */
179 				 4096,			/* boundary */
180 				 BUS_SPACE_MAXADDR,	/* low */
181 				 BUS_SPACE_MAXADDR,	/* high */
182 				 NULL, NULL,		/* filter */
183 				 bytes,			/* maxsize */
184 				 1,			/* num segs */
185 				 4096,			/* maxsegsize */
186 				 BUS_DMA_COHERENT,	/* flags */
187 				 NULL, NULL,		/* lock */
188 				 &dma->dmat);		/* tag */
189 	if (err != 0) {
190 		device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
191 		return err;
192 	}
193 
194 	/* allocate DMAable memory & map */
195 	err = bus_dmamem_alloc(dma->dmat, &dma->addr,
196 			       (BUS_DMA_WAITOK | BUS_DMA_COHERENT
197 				| BUS_DMA_ZERO),  &dma->map);
198 	if (err != 0) {
199 		device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
200 		goto abort_with_dmat;
201 	}
202 
203 	/* load the memory */
204 	err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
205 			      myri10ge_dmamap_callback,
206 			      (void *)&dma->bus_addr, 0);
207 	if (err != 0) {
208 		device_printf(dev, "couldn't load map (err = %d)\n", err);
209 		goto abort_with_mem;
210 	}
211 	return 0;
212 
213 abort_with_mem:
214 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
215 abort_with_dmat:
216 	(void)bus_dma_tag_destroy(dma->dmat);
217 	return err;
218 }
219 
220 
221 static void
222 myri10ge_dma_free(myri10ge_dma_t *dma)
223 {
224 	bus_dmamap_unload(dma->dmat, dma->map);
225 	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
226 	(void)bus_dma_tag_destroy(dma->dmat);
227 }
228 
229 /*
230  * The eeprom strings on the lanaiX have the format
231  * SN=x\0
232  * MAC=x:x:x:x:x:x\0
233  * PC=text\0
234  */
235 
236 static int
237 myri10ge_parse_strings(myri10ge_softc_t *sc)
238 {
239 #define MYRI10GE_NEXT_STRING(p) while(ptr < limit && *ptr++)
240 
241 	char *ptr, *limit;
242 	int i, found_mac;
243 
244 	ptr = sc->eeprom_strings;
245 	limit = sc->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
246 	found_mac = 0;
247 	while (ptr < limit && *ptr != '\0') {
248 		if (memcmp(ptr, "MAC=", 4) == 0) {
249 			ptr+=4;
250 			sc->mac_addr_string = ptr;
251 			for (i = 0; i < 6; i++) {
252 				if ((ptr + 2) > limit)
253 					goto abort;
254 				sc->mac_addr[i] = strtoul(ptr, NULL, 16);
255 				found_mac = 1;
256 				ptr += 3;
257 			}
258 		} else if (memcmp(ptr, "PC=", 4) == 0) {
259 			sc->product_code_string = ptr;
260 		}
261 		MYRI10GE_NEXT_STRING(ptr);
262 	}
263 
264 	if (found_mac)
265 		return 0;
266 
267  abort:
268 	device_printf(sc->dev, "failed to parse eeprom_strings\n");
269 
270 	return ENXIO;
271 }
272 
273 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
274 static int
275 myri10ge_enable_nvidia_ecrc(myri10ge_softc_t *sc, device_t pdev)
276 {
277 	uint32_t val;
278 	unsigned long off;
279 	char *va, *cfgptr;
280 	uint16_t vendor_id, device_id;
281 	uintptr_t bus, slot, func, ivend, idev;
282 	uint32_t *ptr32;
283 
284 	/* XXXX
285 	   Test below is commented because it is believed that doing
286 	   config read/write beyond 0xff will access the config space
287 	   for the next larger function.  Uncomment this and remove
288 	   the hacky pmap_mapdev() way of accessing config space when
289 	   FreeBSD grows support for extended pcie config space access
290 	*/
291 #if 0
292 	/* See if we can, by some miracle, access the extended
293 	   config space */
294 	val = pci_read_config(pdev, 0x178, 4);
295 	if (val != 0xffffffff) {
296 		val |= 0x40;
297 		pci_write_config(pdev, 0x178, val, 4);
298 		return 0;
299 	}
300 #endif
301 	/* Rather than using normal pci config space writes, we must
302 	 * map the Nvidia config space ourselves.  This is because on
303 	 * opteron/nvidia class machine the 0xe000000 mapping is
304 	 * handled by the nvidia chipset, that means the internal PCI
305 	 * device (the on-chip northbridge), or the amd-8131 bridge
306 	 * and things behind them are not visible by this method.
307 	 */
308 
309 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
310 		      PCI_IVAR_BUS, &bus);
311 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
312 		      PCI_IVAR_SLOT, &slot);
313 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
314 		      PCI_IVAR_FUNCTION, &func);
315 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
316 		      PCI_IVAR_VENDOR, &ivend);
317 	BUS_READ_IVAR(device_get_parent(pdev), pdev,
318 		      PCI_IVAR_DEVICE, &idev);
319 
320 	off =  0xe0000000UL
321 		+ 0x00100000UL * (unsigned long)bus
322 		+ 0x00001000UL * (unsigned long)(func
323 						 + 8 * slot);
324 
325 	/* map it into the kernel */
326 	va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
327 
328 
329 	if (va == NULL) {
330 		device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
331 		return EIO;
332 	}
333 	/* get a pointer to the config space mapped into the kernel */
334 	cfgptr = va + (off & PAGE_MASK);
335 
336 	/* make sure that we can really access it */
337 	vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
338 	device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
339 	if (! (vendor_id == ivend && device_id == idev)) {
340 		device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
341 			      vendor_id, device_id);
342 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
343 		return EIO;
344 	}
345 
346 	ptr32 = (uint32_t*)(cfgptr + 0x178);
347 	val = *ptr32;
348 
349 	if (val == 0xffffffff) {
350 		device_printf(sc->dev, "extended mapping failed\n");
351 		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
352 		return EIO;
353 	}
354 	*ptr32 = val | 0x40;
355 	pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
356 	device_printf(sc->dev,
357 		      "Enabled ECRC on upstream Nvidia bridge at %d:%d:%d\n",
358 		      (int)bus, (int)slot, (int)func);
359 	return 0;
360 }
361 #else
362 static int
363 myri10ge_enable_nvidia_ecrc(myri10ge_softc_t *sc, device_t pdev)
364 {
365 	device_printf(sc->dev,
366 		      "Nforce 4 chipset on non-x86/amd64!?!?!\n");
367 	return ENXIO;
368 }
369 #endif
370 /*
371  * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
372  * when the PCI-E Completion packets are aligned on an 8-byte
373  * boundary.  Some PCI-E chip sets always align Completion packets; on
374  * the ones that do not, the alignment can be enforced by enabling
375  * ECRC generation (if supported).
376  *
377  * When PCI-E Completion packets are not aligned, it is actually more
378  * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
379  *
380  * If the driver can neither enable ECRC nor verify that it has
381  * already been enabled, then it must use a firmware image which works
382  * around unaligned completion packets (ethp_z8e.dat), and it should
383  * also ensure that it never gives the device a Read-DMA which is
384  * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
385  * enabled, then the driver should use the aligned (eth_z8e.dat)
386  * firmware image, and set tx.boundary to 4KB.
387  */
388 
389 static void
390 myri10ge_select_firmware(myri10ge_softc_t *sc)
391 {
392 	int err, aligned = 0;
393 	device_t pdev;
394 	uint16_t pvend, pdid;
395 
396 	pdev = device_get_parent(device_get_parent(sc->dev));
397 	if (pdev == NULL) {
398 		device_printf(sc->dev, "could not find parent?\n");
399 		goto abort;
400 	}
401 	pvend = pci_read_config(pdev, PCIR_VENDOR, 2);
402 	pdid = pci_read_config(pdev, PCIR_DEVICE, 2);
403 
404 	/* see if we can enable ECRC's on an upstream
405 	   Nvidia bridge */
406 	if (myri10ge_nvidia_ecrc_enable &&
407 	    (pvend == 0x10de && pdid == 0x005d)) {
408 		err = myri10ge_enable_nvidia_ecrc(sc, pdev);
409 		if (err == 0) {
410 			aligned = 1;
411 			device_printf(sc->dev,
412 				      "Assuming aligned completions (ECRC)\n");
413 		}
414 	}
415 	/* see if the upstream bridge is known to
416 	   provided aligned completions */
417 	if (/* HT2000  */ (pvend == 0x1166 && pdid == 0x0132) ||
418 	    /* Ontario */ (pvend == 0x10b5 && pdid == 0x8532)) {
419 		device_printf(sc->dev,
420 			      "Assuming aligned completions (0x%x:0x%x)\n",
421 			      pvend, pdid);
422 	}
423 
424 abort:
425 	if (aligned) {
426 		sc->fw_name = myri10ge_fw_aligned;
427 		sc->tx.boundary = 4096;
428 	} else {
429 		sc->fw_name = myri10ge_fw_unaligned;
430 		sc->tx.boundary = 2048;
431 	}
432 }
433 
434 union qualhack
435 {
436         const char *ro_char;
437         char *rw_char;
438 };
439 
440 
441 static int
442 myri10ge_load_firmware_helper(myri10ge_softc_t *sc, uint32_t *limit)
443 {
444 	struct firmware *fw;
445 	const mcp_gen_header_t *hdr;
446 	unsigned hdr_offset;
447 	const char *fw_data;
448 	union qualhack hack;
449 	int status;
450 
451 
452 	fw = firmware_get(sc->fw_name);
453 
454 	if (fw == NULL) {
455 		device_printf(sc->dev, "Could not find firmware image %s\n",
456 			      sc->fw_name);
457 		return ENOENT;
458 	}
459 	if (fw->datasize > *limit ||
460 	    fw->datasize < MCP_HEADER_PTR_OFFSET + 4) {
461 		device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n",
462 			      sc->fw_name, (int)fw->datasize, (int) *limit);
463 		status = ENOSPC;
464 		goto abort_with_fw;
465 	}
466 	*limit = fw->datasize;
467 
468 	/* check id */
469 	fw_data = (const char *)fw->data;
470 	hdr_offset = htobe32(*(const uint32_t *)
471 			     (fw_data + MCP_HEADER_PTR_OFFSET));
472 	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) {
473 		device_printf(sc->dev, "Bad firmware file");
474 		status = EIO;
475 		goto abort_with_fw;
476 	}
477 	hdr = (const void*)(fw_data + hdr_offset);
478 	if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
479 		device_printf(sc->dev, "Bad firmware type: 0x%x\n",
480 			      be32toh(hdr->mcp_type));
481 		status = EIO;
482 		goto abort_with_fw;
483 	}
484 
485 	/* save firmware version for sysctl */
486 	strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
487 	device_printf(sc->dev, "firmware id: %s\n", hdr->version);
488 
489 	hack.ro_char = fw_data;
490 	/* Copy the inflated firmware to NIC SRAM. */
491 	myri10ge_pio_copy(&sc->sram[MYRI10GE_FW_OFFSET],
492 			  hack.rw_char,  *limit);
493 
494 	status = 0;
495 abort_with_fw:
496 	firmware_put(fw, FIRMWARE_UNLOAD);
497 	return status;
498 }
499 
500 /*
501  * Enable or disable periodic RDMAs from the host to make certain
502  * chipsets resend dropped PCIe messages
503  */
504 
505 static void
506 myri10ge_dummy_rdma(myri10ge_softc_t *sc, int enable)
507 {
508 	char buf_bytes[72];
509 	volatile uint32_t *confirm;
510 	volatile char *submit;
511 	uint32_t *buf, dma_low, dma_high;
512 	int i;
513 
514 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
515 
516 	/* clear confirmation addr */
517 	confirm = (volatile uint32_t *)sc->cmd;
518 	*confirm = 0;
519 	mb();
520 
521 	/* send an rdma command to the PCIe engine, and wait for the
522 	   response in the confirmation address.  The firmware should
523 	   write a -1 there to indicate it is alive and well
524 	*/
525 
526 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
527 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
528 	buf[0] = htobe32(dma_high);		/* confirm addr MSW */
529 	buf[1] = htobe32(dma_low);		/* confirm addr LSW */
530 	buf[2] = htobe32(0xffffffff);		/* confirm data */
531 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
532 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
533 	buf[3] = htobe32(dma_high); 		/* dummy addr MSW */
534 	buf[4] = htobe32(dma_low); 		/* dummy addr LSW */
535 	buf[5] = htobe32(enable);			/* enable? */
536 
537 
538 	submit = (volatile char *)(sc->sram + 0xfc01c0);
539 
540 	myri10ge_pio_copy(submit, buf, 64);
541 	mb();
542 	DELAY(1000);
543 	mb();
544 	i = 0;
545 	while (*confirm != 0xffffffff && i < 20) {
546 		DELAY(1000);
547 		i++;
548 	}
549 	if (*confirm != 0xffffffff) {
550 		device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
551 			      (enable ? "enable" : "disable"), confirm,
552 			      *confirm);
553 	}
554 	return;
555 }
556 
557 static int
558 myri10ge_send_cmd(myri10ge_softc_t *sc, uint32_t cmd,
559 		  myri10ge_cmd_t *data)
560 {
561 	mcp_cmd_t *buf;
562 	char buf_bytes[sizeof(*buf) + 8];
563 	volatile mcp_cmd_response_t *response = sc->cmd;
564 	volatile char *cmd_addr = sc->sram + MYRI10GE_MCP_CMD_OFFSET;
565 	uint32_t dma_low, dma_high;
566 	int sleep_total = 0;
567 
568 	/* ensure buf is aligned to 8 bytes */
569 	buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
570 
571 	buf->data0 = htobe32(data->data0);
572 	buf->data1 = htobe32(data->data1);
573 	buf->data2 = htobe32(data->data2);
574 	buf->cmd = htobe32(cmd);
575 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
576 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
577 
578 	buf->response_addr.low = htobe32(dma_low);
579 	buf->response_addr.high = htobe32(dma_high);
580 	mtx_lock(&sc->cmd_lock);
581 	response->result = 0xffffffff;
582 	mb();
583 	myri10ge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
584 
585 	/* wait up to 2 seconds */
586 	for (sleep_total = 0; sleep_total <  (2 * 1000); sleep_total += 10) {
587 		bus_dmamap_sync(sc->cmd_dma.dmat,
588 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
589 		mb();
590 		if (response->result != 0xffffffff) {
591 			if (response->result == 0) {
592 				data->data0 = be32toh(response->data);
593 				mtx_unlock(&sc->cmd_lock);
594 				return 0;
595 			} else {
596 				device_printf(sc->dev,
597 					      "myri10ge: command %d "
598 					      "failed, result = %d\n",
599 					      cmd, be32toh(response->result));
600 				mtx_unlock(&sc->cmd_lock);
601 				return ENXIO;
602 			}
603 		}
604 		DELAY(1000 * 10);
605 	}
606 	mtx_unlock(&sc->cmd_lock);
607 	device_printf(sc->dev, "myri10ge: command %d timed out"
608 		      "result = %d\n",
609 		      cmd, be32toh(response->result));
610 	return EAGAIN;
611 }
612 
613 
614 static int
615 myri10ge_load_firmware(myri10ge_softc_t *sc)
616 {
617 	volatile uint32_t *confirm;
618 	volatile char *submit;
619 	char buf_bytes[72];
620 	uint32_t *buf, size, dma_low, dma_high;
621 	int status, i;
622 
623 	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
624 
625 	size = sc->sram_size;
626 	status = myri10ge_load_firmware_helper(sc, &size);
627 	if (status) {
628 		device_printf(sc->dev, "firmware loading failed\n");
629 		return status;
630 	}
631 	/* clear confirmation addr */
632 	confirm = (volatile uint32_t *)sc->cmd;
633 	*confirm = 0;
634 	mb();
635 	/* send a reload command to the bootstrap MCP, and wait for the
636 	   response in the confirmation address.  The firmware should
637 	   write a -1 there to indicate it is alive and well
638 	*/
639 
640 	dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
641 	dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
642 
643 	buf[0] = htobe32(dma_high);	/* confirm addr MSW */
644 	buf[1] = htobe32(dma_low);	/* confirm addr LSW */
645 	buf[2] = htobe32(0xffffffff);	/* confirm data */
646 
647 	/* FIX: All newest firmware should un-protect the bottom of
648 	   the sram before handoff. However, the very first interfaces
649 	   do not. Therefore the handoff copy must skip the first 8 bytes
650 	*/
651 					/* where the code starts*/
652 	buf[3] = htobe32(MYRI10GE_FW_OFFSET + 8);
653 	buf[4] = htobe32(size - 8); 	/* length of code */
654 	buf[5] = htobe32(8);		/* where to copy to */
655 	buf[6] = htobe32(0);		/* where to jump to */
656 
657 	submit = (volatile char *)(sc->sram + 0xfc0000);
658 	myri10ge_pio_copy(submit, buf, 64);
659 	mb();
660 	DELAY(1000);
661 	mb();
662 	i = 0;
663 	while (*confirm != 0xffffffff && i < 20) {
664 		DELAY(1000*10);
665 		i++;
666 		bus_dmamap_sync(sc->cmd_dma.dmat,
667 				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
668 	}
669 	if (*confirm != 0xffffffff) {
670 		device_printf(sc->dev,"handoff failed (%p = 0x%x)",
671 			confirm, *confirm);
672 
673 		return ENXIO;
674 	}
675 	myri10ge_dummy_rdma(sc, 1);
676 	return 0;
677 }
678 
679 static int
680 myri10ge_update_mac_address(myri10ge_softc_t *sc)
681 {
682 	myri10ge_cmd_t cmd;
683 	uint8_t *addr = sc->mac_addr;
684 	int status;
685 
686 
687 	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
688 		     | (addr[2] << 8) | addr[3]);
689 
690 	cmd.data1 = ((addr[4] << 8) | (addr[5]));
691 
692 	status = myri10ge_send_cmd(sc, MYRI10GE_MCP_SET_MAC_ADDRESS, &cmd);
693 	return status;
694 }
695 
696 static int
697 myri10ge_change_pause(myri10ge_softc_t *sc, int pause)
698 {
699 	myri10ge_cmd_t cmd;
700 	int status;
701 
702 	if (pause)
703 		status = myri10ge_send_cmd(sc,
704 					   MYRI10GE_MCP_ENABLE_FLOW_CONTROL,
705 					   &cmd);
706 	else
707 		status = myri10ge_send_cmd(sc,
708 					   MYRI10GE_MCP_DISABLE_FLOW_CONTROL,
709 					   &cmd);
710 
711 	if (status) {
712 		device_printf(sc->dev, "Failed to set flow control mode\n");
713 		return ENXIO;
714 	}
715 	sc->pause = pause;
716 	return 0;
717 }
718 
719 static void
720 myri10ge_change_promisc(myri10ge_softc_t *sc, int promisc)
721 {
722 	myri10ge_cmd_t cmd;
723 	int status;
724 
725 	if (promisc)
726 		status = myri10ge_send_cmd(sc,
727 					   MYRI10GE_MCP_ENABLE_PROMISC,
728 					   &cmd);
729 	else
730 		status = myri10ge_send_cmd(sc,
731 					   MYRI10GE_MCP_DISABLE_PROMISC,
732 					   &cmd);
733 
734 	if (status) {
735 		device_printf(sc->dev, "Failed to set promisc mode\n");
736 	}
737 }
738 
739 static int
740 myri10ge_reset(myri10ge_softc_t *sc)
741 {
742 
743 	myri10ge_cmd_t cmd;
744 	int status, i;
745 
746 	/* try to send a reset command to the card to see if it
747 	   is alive */
748 	memset(&cmd, 0, sizeof (cmd));
749 	status = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_RESET, &cmd);
750 	if (status != 0) {
751 		device_printf(sc->dev, "failed reset\n");
752 		return ENXIO;
753 	}
754 
755 	/* Now exchange information about interrupts  */
756 
757 	cmd.data0 = (uint32_t)
758 		(myri10ge_max_intr_slots * sizeof (*sc->intr.q[0]));
759 	status = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_INTRQ_SIZE, &cmd);
760 	for (i = 0; (status == 0) && (i < MYRI10GE_NUM_INTRQS); i++) {
761 		cmd.data0 = MYRI10GE_LOWPART_TO_U32(sc->intr.dma[i].bus_addr);
762 		cmd.data1 = MYRI10GE_HIGHPART_TO_U32(sc->intr.dma[i].bus_addr);
763 		status |=
764 			myri10ge_send_cmd(sc, (i +
765 					       MYRI10GE_MCP_CMD_SET_INTRQ0_DMA),
766 					  &cmd);
767 	}
768 
769 	cmd.data0 = sc->intr_coal_delay = myri10ge_intr_coal_delay;
770 	status |= myri10ge_send_cmd(sc,
771 				    MYRI10GE_MCP_CMD_SET_INTR_COAL_DELAY, &cmd);
772 
773 	if (sc->msi_enabled) {
774 		status |= myri10ge_send_cmd
775 			(sc,  MYRI10GE_MCP_CMD_GET_IRQ_ACK_OFFSET, &cmd);
776 	} else {
777 		status |= myri10ge_send_cmd
778 			(sc,  MYRI10GE_MCP_CMD_GET_IRQ_ACK_DEASSERT_OFFSET,
779 			 &cmd);
780 	}
781 	if (status != 0) {
782 		device_printf(sc->dev, "failed set interrupt parameters\n");
783 		return status;
784 	}
785 	sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
786 
787 	/* reset mcp/driver shared state back to 0 */
788 	sc->intr.seqnum = 0;
789 	sc->intr.intrq = 0;
790 	sc->intr.slot = 0;
791 	sc->tx.req = 0;
792 	sc->tx.done = 0;
793 	sc->rx_big.cnt = 0;
794 	sc->rx_small.cnt = 0;
795 	sc->rdma_tags_available = 15;
796 	status = myri10ge_update_mac_address(sc);
797 	myri10ge_change_promisc(sc, 0);
798 	myri10ge_change_pause(sc, sc->pause);
799 	return status;
800 }
801 
802 static int
803 myri10ge_change_intr_coal(SYSCTL_HANDLER_ARGS)
804 {
805         myri10ge_cmd_t cmd;
806         myri10ge_softc_t *sc;
807         unsigned int intr_coal_delay;
808         int err;
809 
810         sc = arg1;
811         intr_coal_delay = sc->intr_coal_delay;
812         err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
813         if (err != 0) {
814                 return err;
815         }
816         if (intr_coal_delay == sc->intr_coal_delay)
817                 return 0;
818 
819         if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
820                 return EINVAL;
821 
822 	sx_xlock(&sc->driver_lock);
823         cmd.data0 = intr_coal_delay;
824         err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_INTR_COAL_DELAY,
825 				  &cmd);
826         if (err == 0) {
827 		sc->intr_coal_delay = intr_coal_delay;
828 	}
829 	sx_xunlock(&sc->driver_lock);
830         return err;
831 }
832 
833 static int
834 myri10ge_change_flow_control(SYSCTL_HANDLER_ARGS)
835 {
836         myri10ge_softc_t *sc;
837         unsigned int enabled;
838         int err;
839 
840         sc = arg1;
841         enabled = sc->pause;
842         err = sysctl_handle_int(oidp, &enabled, arg2, req);
843         if (err != 0) {
844                 return err;
845         }
846         if (enabled == sc->pause)
847                 return 0;
848 
849 	sx_xlock(&sc->driver_lock);
850 	err = myri10ge_change_pause(sc, enabled);
851 	sx_xunlock(&sc->driver_lock);
852         return err;
853 }
854 
855 static int
856 myri10ge_handle_be32(SYSCTL_HANDLER_ARGS)
857 {
858         int err;
859 
860         if (arg1 == NULL)
861                 return EFAULT;
862         arg2 = be32toh(*(int *)arg1);
863         arg1 = NULL;
864         err = sysctl_handle_int(oidp, arg1, arg2, req);
865 
866         return err;
867 }
868 
869 static void
870 myri10ge_add_sysctls(myri10ge_softc_t *sc)
871 {
872 	struct sysctl_ctx_list *ctx;
873 	struct sysctl_oid_list *children;
874 	mcp_stats_t *fw;
875 
876 	ctx = device_get_sysctl_ctx(sc->dev);
877 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
878 	fw = sc->fw_stats;
879 
880 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
881 			"intr_coal_delay",
882 			CTLTYPE_INT|CTLFLAG_RW, sc,
883 			0, myri10ge_change_intr_coal,
884 			"I", "interrupt coalescing delay in usecs");
885 
886 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
887 			"flow_control_enabled",
888 			CTLTYPE_INT|CTLFLAG_RW, sc,
889 			0, myri10ge_change_flow_control,
890 			"I", "interrupt coalescing delay in usecs");
891 
892 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
893 		       "skip_pio_read",
894 		       CTLFLAG_RW, &myri10ge_skip_pio_read,
895 		       0, "Skip pio read in interrupt handler");
896 
897 	/* stats block from firmware is in network byte order.
898 	   Need to swap it */
899 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
900 			"link_up",
901 			CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
902 			0, myri10ge_handle_be32,
903 			"I", "link up");
904 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
905 			"rdma_tags_available",
906 			CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
907 			0, myri10ge_handle_be32,
908 			"I", "rdma_tags_available");
909 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
910 			"dropped_link_overflow",
911 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
912 			0, myri10ge_handle_be32,
913 			"I", "dropped_link_overflow");
914 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
915 			"dropped_link_error_or_filtered",
916 			CTLTYPE_INT|CTLFLAG_RD,
917 			&fw->dropped_link_error_or_filtered,
918 			0, myri10ge_handle_be32,
919 			"I", "dropped_link_error_or_filtered");
920 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
921 			"dropped_runt",
922 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
923 			0, myri10ge_handle_be32,
924 			"I", "dropped_runt");
925 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
926 			"dropped_overrun",
927 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
928 			0, myri10ge_handle_be32,
929 			"I", "dropped_overrun");
930 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
931 			"dropped_no_small_buffer",
932 			CTLTYPE_INT|CTLFLAG_RD,
933 			&fw->dropped_no_small_buffer,
934 			0, myri10ge_handle_be32,
935 			"I", "dropped_no_small_buffer");
936 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
937 			"dropped_no_big_buffer",
938 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
939 			0, myri10ge_handle_be32,
940 			"I", "dropped_no_big_buffer");
941 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
942 			"dropped_interrupt_busy",
943 			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_interrupt_busy,
944 			0, myri10ge_handle_be32,
945 			"I", "dropped_interrupt_busy");
946 
947 	/* host counters exported for debugging */
948 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
949 		       "tx_req",
950 		       CTLFLAG_RD, &sc->tx.req,
951 		       0, "tx_req");
952 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
953 		       "tx_done",
954 		       CTLFLAG_RD, &sc->tx.done,
955 		       0, "tx_done");
956 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
957 		       "rx_small_cnt",
958 		       CTLFLAG_RD, &sc->rx_small.cnt,
959 		       0, "rx_small_cnt");
960 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
961 		       "rx_big_cnt",
962 		       CTLFLAG_RD, &sc->rx_big.cnt,
963 		       0, "rx_small_cnt");
964 
965 }
966 
967 /* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
968    backwards one at a time and handle ring wraps */
969 
970 static inline void
971 myri10ge_submit_req_backwards(myri10ge_tx_buf_t *tx,
972 			    mcp_kreq_ether_send_t *src, int cnt)
973 {
974         int idx, starting_slot;
975         starting_slot = tx->req;
976         while (cnt > 1) {
977                 cnt--;
978                 idx = (starting_slot + cnt) & tx->mask;
979                 myri10ge_pio_copy(&tx->lanai[idx],
980 				&src[cnt], sizeof(*src));
981                 mb();
982         }
983 }
984 
985 /*
986  * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
987  * at most 32 bytes at a time, so as to avoid involving the software
988  * pio handler in the nic.   We re-write the first segment's flags
989  * to mark them valid only after writing the entire chain
990  */
991 
992 static inline void
993 myri10ge_submit_req(myri10ge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
994                   int cnt)
995 {
996         int idx, i;
997         uint32_t *src_ints;
998 	volatile uint32_t *dst_ints;
999         mcp_kreq_ether_send_t *srcp;
1000 	volatile mcp_kreq_ether_send_t *dstp, *dst;
1001 
1002 
1003         idx = tx->req & tx->mask;
1004 
1005         src->flags &= ~(htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID));
1006         mb();
1007         dst = dstp = &tx->lanai[idx];
1008         srcp = src;
1009 
1010         if ((idx + cnt) < tx->mask) {
1011                 for (i = 0; i < (cnt - 1); i += 2) {
1012                         myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1013                         mb(); /* force write every 32 bytes */
1014                         srcp += 2;
1015                         dstp += 2;
1016                 }
1017         } else {
1018                 /* submit all but the first request, and ensure
1019                    that it is submitted below */
1020                 myri10ge_submit_req_backwards(tx, src, cnt);
1021                 i = 0;
1022         }
1023         if (i < cnt) {
1024                 /* submit the first request */
1025                 myri10ge_pio_copy(dstp, srcp, sizeof(*src));
1026                 mb(); /* barrier before setting valid flag */
1027         }
1028 
1029         /* re-write the last 32-bits with the valid flags */
1030         src->flags |= htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID);
1031         src_ints = (uint32_t *)src;
1032         src_ints+=3;
1033         dst_ints = (volatile uint32_t *)dst;
1034         dst_ints+=3;
1035         *dst_ints =  *src_ints;
1036         tx->req += cnt;
1037         mb();
1038 }
1039 
1040 static inline void
1041 myri10ge_submit_req_wc(myri10ge_tx_buf_t *tx,
1042 		     mcp_kreq_ether_send_t *src, int cnt)
1043 {
1044     tx->req += cnt;
1045     mb();
1046     while (cnt >= 4) {
1047 	    myri10ge_pio_copy((volatile char *)tx->wc_fifo, src, 64);
1048 	    mb();
1049 	    src += 4;
1050 	    cnt -= 4;
1051     }
1052     if (cnt > 0) {
1053 	    /* pad it to 64 bytes.  The src is 64 bytes bigger than it
1054 	       needs to be so that we don't overrun it */
1055 	    myri10ge_pio_copy(tx->wc_fifo + (cnt<<18), src, 64);
1056 	    mb();
1057     }
1058 }
1059 
1060 static void
1061 myri10ge_encap(myri10ge_softc_t *sc, struct mbuf *m)
1062 {
1063 	mcp_kreq_ether_send_t *req;
1064 	bus_dma_segment_t seg_list[MYRI10GE_MCP_ETHER_MAX_SEND_DESC];
1065 	bus_dma_segment_t *seg;
1066 	struct mbuf *m_tmp;
1067 	struct ifnet *ifp;
1068 	myri10ge_tx_buf_t *tx;
1069 	struct ether_header *eh;
1070 	struct ip *ip;
1071 	int cnt, cum_len, err, i, idx;
1072 	uint16_t flags, pseudo_hdr_offset;
1073         uint8_t cksum_offset;
1074 
1075 
1076 
1077 	ifp = sc->ifp;
1078 	tx = &sc->tx;
1079 
1080 	/* (try to) map the frame for DMA */
1081 	idx = tx->req & tx->mask;
1082 	err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1083 				      m, seg_list, &cnt,
1084 				      BUS_DMA_NOWAIT);
1085 	if (err == EFBIG) {
1086 		/* Too many segments in the chain.  Try
1087 		   to defrag */
1088 		m_tmp = m_defrag(m, M_NOWAIT);
1089 		if (m_tmp == NULL) {
1090 			goto drop;
1091 		}
1092 		m = m_tmp;
1093 		err = bus_dmamap_load_mbuf_sg(tx->dmat,
1094 					      tx->info[idx].map,
1095 					      m, seg_list, &cnt,
1096 					      BUS_DMA_NOWAIT);
1097 	}
1098 	if (err != 0) {
1099 		device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d\n",
1100 			      err);
1101 		goto drop;
1102 	}
1103 	bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1104 			BUS_DMASYNC_PREWRITE);
1105 
1106 	req = tx->req_list;
1107 	cksum_offset = 0;
1108 	flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID |
1109 			MYRI10GE_MCP_ETHER_FLAGS_NOT_LAST);
1110 
1111 	/* checksum offloading? */
1112 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1113 		eh = mtod(m, struct ether_header *);
1114 		ip = (struct ip *) (eh + 1);
1115 		cksum_offset = sizeof(*eh) + (ip->ip_hl << 2);
1116 		pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
1117 		req->pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1118 		req->cksum_offset = cksum_offset;
1119 		flags |= htobe16(MYRI10GE_MCP_ETHER_FLAGS_CKSUM);
1120 	}
1121 	if (m->m_pkthdr.len < 512)
1122 		req->flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_FIRST |
1123 				     MYRI10GE_MCP_ETHER_FLAGS_SMALL);
1124 	else
1125 		req->flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_FIRST);
1126 
1127 	/* convert segments into a request list */
1128 	cum_len = 0;
1129 	seg = seg_list;
1130 	for (i = 0; i < cnt; i++) {
1131 		req->addr_low =
1132 			htobe32(MYRI10GE_LOWPART_TO_U32(seg->ds_addr));
1133 		req->addr_high =
1134 			htobe32(MYRI10GE_HIGHPART_TO_U32(seg->ds_addr));
1135 		req->length = htobe16(seg->ds_len);
1136 		req->cksum_offset = cksum_offset;
1137 		if (cksum_offset > seg->ds_len)
1138 			cksum_offset -= seg->ds_len;
1139 		else
1140 			cksum_offset = 0;
1141 		req->flags |= flags | ((cum_len & 1) *
1142 				       htobe16(MYRI10GE_MCP_ETHER_FLAGS_ALIGN_ODD));
1143 		cum_len += seg->ds_len;
1144 		seg++;
1145 		req++;
1146 		req->flags = 0;
1147 	}
1148 	req--;
1149 	/* pad runts to 60 bytes */
1150 	if (cum_len < 60) {
1151 		req++;
1152 		req->addr_low =
1153 			htobe32(MYRI10GE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
1154 		req->addr_high =
1155 			htobe32(MYRI10GE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
1156 		req->length = htobe16(60 - cum_len);
1157 		req->cksum_offset = cksum_offset;
1158 		req->flags |= flags | ((cum_len & 1) *
1159                                        htobe16(MYRI10GE_MCP_ETHER_FLAGS_ALIGN_ODD));
1160 		cnt++;
1161 	}
1162 	req->flags &= ~(htobe16(MYRI10GE_MCP_ETHER_FLAGS_NOT_LAST));
1163 	tx->info[idx].m = m;
1164 	if (tx->wc_fifo == NULL)
1165 		myri10ge_submit_req(tx, tx->req_list, cnt);
1166 	else
1167 		myri10ge_submit_req_wc(tx, tx->req_list, cnt);
1168 	return;
1169 
1170 drop:
1171 	m_freem(m);
1172 	ifp->if_oerrors++;
1173 	return;
1174 }
1175 
1176 
1177 static void
1178 myri10ge_start_locked(myri10ge_softc_t *sc)
1179 {
1180 	int avail;
1181 	struct mbuf *m;
1182 	struct ifnet *ifp;
1183 
1184 
1185 	ifp = sc->ifp;
1186 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1187 		 /* dequeue the packet */
1188 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1189 
1190 		/* let BPF see it */
1191 		BPF_MTAP(ifp, m);
1192 
1193 		/* give it to the nic */
1194 		myri10ge_encap(sc, m);
1195 
1196 		/* leave an extra slot keep the ring from wrapping */
1197 		avail = sc->tx.mask - (sc->tx.req - sc->tx.done);
1198 		if (avail < MYRI10GE_MCP_ETHER_MAX_SEND_DESC) {
1199 			sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1200 			return;
1201 		}
1202 	}
1203 }
1204 
1205 static void
1206 myri10ge_start(struct ifnet *ifp)
1207 {
1208 	myri10ge_softc_t *sc = ifp->if_softc;
1209 
1210 
1211 	mtx_lock(&sc->tx_lock);
1212 	myri10ge_start_locked(sc);
1213 	mtx_unlock(&sc->tx_lock);
1214 }
1215 
1216 static int
1217 myri10ge_get_buf_small(myri10ge_softc_t *sc, bus_dmamap_t map, int idx)
1218 {
1219 	bus_dma_segment_t seg;
1220 	struct mbuf *m;
1221 	myri10ge_rx_buf_t *rx = &sc->rx_small;
1222 	int cnt, err;
1223 
1224 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1225 	if (m == NULL) {
1226 		rx->alloc_fail++;
1227 		err = ENOBUFS;
1228 		goto done;
1229 	}
1230 	m->m_len = MHLEN;
1231 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1232 				      &seg, &cnt, BUS_DMA_NOWAIT);
1233 	if (err != 0) {
1234 		m_free(m);
1235 		goto done;
1236 	}
1237 	rx->info[idx].m = m;
1238 	rx->shadow[idx].addr_low =
1239 		htobe32(MYRI10GE_LOWPART_TO_U32(seg.ds_addr));
1240 	rx->shadow[idx].addr_high =
1241 		htobe32(MYRI10GE_HIGHPART_TO_U32(seg.ds_addr));
1242 
1243 done:
1244 	if ((idx & 7) == 7) {
1245                 myri10ge_pio_copy(&rx->lanai[idx - 7],
1246 				  &rx->shadow[idx - 7],
1247                                   8 * sizeof (*rx->lanai));
1248                 mb();
1249         }
1250 	return err;
1251 }
1252 
1253 static int
1254 myri10ge_get_buf_big(myri10ge_softc_t *sc, bus_dmamap_t map, int idx)
1255 {
1256 	bus_dma_segment_t seg;
1257 	struct mbuf *m;
1258 	myri10ge_rx_buf_t *rx = &sc->rx_big;
1259 	int cnt, err;
1260 
1261 	m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes);
1262 	if (m == NULL) {
1263 		rx->alloc_fail++;
1264 		err = ENOBUFS;
1265 		goto done;
1266 	}
1267 	m->m_len = sc->big_bytes;
1268 	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1269 				      &seg, &cnt, BUS_DMA_NOWAIT);
1270 	if (err != 0) {
1271 		m_free(m);
1272 		goto done;
1273 	}
1274 	rx->info[idx].m = m;
1275 	rx->shadow[idx].addr_low =
1276 		htobe32(MYRI10GE_LOWPART_TO_U32(seg.ds_addr));
1277 	rx->shadow[idx].addr_high =
1278 		htobe32(MYRI10GE_HIGHPART_TO_U32(seg.ds_addr));
1279 
1280 done:
1281 	if ((idx & 7) == 7) {
1282                 myri10ge_pio_copy(&rx->lanai[idx - 7],
1283 				  &rx->shadow[idx - 7],
1284                                   8 * sizeof (*rx->lanai));
1285                 mb();
1286         }
1287 	return err;
1288 }
1289 
1290 static inline void
1291 myri10ge_rx_done_big(myri10ge_softc_t *sc, int len, int csum, int flags)
1292 {
1293 	struct ifnet *ifp;
1294 	struct mbuf *m = 0; 		/* -Wunitialized */
1295 	struct mbuf *m_prev = 0;	/* -Wunitialized */
1296 	struct mbuf *m_head = 0;
1297 	bus_dmamap_t old_map;
1298 	myri10ge_rx_buf_t *rx;
1299 	int idx;
1300 
1301 
1302 	rx = &sc->rx_big;
1303 	ifp = sc->ifp;
1304 	while (len > 0) {
1305 		idx = rx->cnt & rx->mask;
1306                 rx->cnt++;
1307 		/* save a pointer to the received mbuf */
1308 		m = rx->info[idx].m;
1309 		/* try to replace the received mbuf */
1310 		if (myri10ge_get_buf_big(sc, rx->extra_map, idx)) {
1311 			goto drop;
1312 		}
1313 		/* unmap the received buffer */
1314 		old_map = rx->info[idx].map;
1315 		bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1316 		bus_dmamap_unload(rx->dmat, old_map);
1317 
1318 		/* swap the bus_dmamap_t's */
1319 		rx->info[idx].map = rx->extra_map;
1320 		rx->extra_map = old_map;
1321 
1322 		/* chain multiple segments together */
1323 		if (!m_head) {
1324 			m_head = m;
1325 			/* mcp implicitly skips 1st bytes so that
1326 			 * packet is properly aligned */
1327 			m->m_data += MYRI10GE_MCP_ETHER_PAD;
1328 			m->m_pkthdr.len = len;
1329 			m->m_len = sc->big_bytes - MYRI10GE_MCP_ETHER_PAD;
1330 		} else {
1331 			m->m_len = sc->big_bytes;
1332 			m->m_flags &= ~M_PKTHDR;
1333 			m_prev->m_next = m;
1334 		}
1335 		len -= m->m_len;
1336 		m_prev = m;
1337 	}
1338 
1339 	/* trim trailing garbage from the last mbuf in the chain.  If
1340 	 * there is any garbage, len will be negative */
1341 	m->m_len += len;
1342 
1343 	/* if the checksum is valid, mark it in the mbuf header */
1344 	if (sc->csum_flag & flags) {
1345 		m_head->m_pkthdr.csum_data = csum;
1346 		m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID;
1347 	}
1348 
1349 	/* pass the frame up the stack */
1350 	m_head->m_pkthdr.rcvif = ifp;
1351 	ifp->if_ipackets++;
1352 	(*ifp->if_input)(ifp, m_head);
1353 	return;
1354 
1355 drop:
1356 	/* drop the frame -- the old mbuf(s) are re-cycled by running
1357 	   every slot through the allocator */
1358         if (m_head) {
1359                 len -= sc->big_bytes;
1360                 m_freem(m_head);
1361         } else {
1362                 len -= (sc->big_bytes + MYRI10GE_MCP_ETHER_PAD);
1363         }
1364         while ((int)len > 0) {
1365                 idx = rx->cnt & rx->mask;
1366                 rx->cnt++;
1367                 m = rx->info[idx].m;
1368                 if (0 == (myri10ge_get_buf_big(sc, rx->extra_map, idx))) {
1369 			m_freem(m);
1370 			/* unmap the received buffer */
1371 			old_map = rx->info[idx].map;
1372 			bus_dmamap_sync(rx->dmat, old_map,
1373 					BUS_DMASYNC_POSTREAD);
1374 			bus_dmamap_unload(rx->dmat, old_map);
1375 
1376 			/* swap the bus_dmamap_t's */
1377 			rx->info[idx].map = rx->extra_map;
1378 			rx->extra_map = old_map;
1379 		}
1380                 len -= sc->big_bytes;
1381         }
1382 
1383 	ifp->if_ierrors++;
1384 
1385 }
1386 
1387 
1388 static inline void
1389 myri10ge_rx_done_small(myri10ge_softc_t *sc, uint32_t len,
1390                        uint32_t csum, uint32_t flags)
1391 {
1392 	struct ifnet *ifp;
1393 	struct mbuf *m;
1394 	myri10ge_rx_buf_t *rx;
1395 	bus_dmamap_t old_map;
1396 	int idx;
1397 
1398 	ifp = sc->ifp;
1399 	rx = &sc->rx_small;
1400 	idx = rx->cnt & rx->mask;
1401 	rx->cnt++;
1402 	/* save a pointer to the received mbuf */
1403 	m = rx->info[idx].m;
1404 	/* try to replace the received mbuf */
1405 	if (myri10ge_get_buf_small(sc, rx->extra_map, idx)) {
1406 		/* drop the frame -- the old mbuf is re-cycled */
1407 		ifp->if_ierrors++;
1408 		return;
1409 	}
1410 
1411 	/* unmap the received buffer */
1412 	old_map = rx->info[idx].map;
1413 	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1414 	bus_dmamap_unload(rx->dmat, old_map);
1415 
1416 	/* swap the bus_dmamap_t's */
1417 	rx->info[idx].map = rx->extra_map;
1418 	rx->extra_map = old_map;
1419 
1420 	/* mcp implicitly skips 1st 2 bytes so that packet is properly
1421 	 * aligned */
1422 	m->m_data += MYRI10GE_MCP_ETHER_PAD;
1423 
1424 	/* if the checksum is valid, mark it in the mbuf header */
1425 	if (sc->csum_flag & flags) {
1426 		m->m_pkthdr.csum_data = csum;
1427 		m->m_pkthdr.csum_flags = CSUM_DATA_VALID;
1428 	}
1429 
1430 	/* pass the frame up the stack */
1431 	m->m_pkthdr.rcvif = ifp;
1432 	m->m_len = m->m_pkthdr.len = len;
1433 	ifp->if_ipackets++;
1434 	(*ifp->if_input)(ifp, m);
1435 }
1436 
1437 static inline void
1438 myri10ge_tx_done(myri10ge_softc_t *sc, uint32_t mcp_idx)
1439 {
1440 	struct ifnet *ifp;
1441 	myri10ge_tx_buf_t *tx;
1442 	struct mbuf *m;
1443 	bus_dmamap_t map;
1444 	int idx;
1445 
1446 	tx = &sc->tx;
1447 	ifp = sc->ifp;
1448 	while (tx->done != mcp_idx) {
1449 		idx = tx->done & tx->mask;
1450 		tx->done++;
1451 		m = tx->info[idx].m;
1452 		/* mbuf and DMA map only attached to the first
1453 		   segment per-mbuf */
1454 		if (m != NULL) {
1455 			ifp->if_opackets++;
1456 			tx->info[idx].m = NULL;
1457 			map = tx->info[idx].map;
1458 			bus_dmamap_unload(tx->dmat, map);
1459 			m_freem(m);
1460 		}
1461 	}
1462 
1463 	/* If we have space, clear IFF_OACTIVE to tell the stack that
1464            its OK to send packets */
1465 
1466 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
1467 	    tx->req - tx->done < (tx->mask + 1)/4) {
1468 		mtx_lock(&sc->tx_lock);
1469 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1470 		myri10ge_start_locked(sc);
1471 		mtx_unlock(&sc->tx_lock);
1472 	}
1473 }
1474 
1475 static void
1476 myri10ge_dump_interrupt_queues(myri10ge_softc_t *sc, int maxslot)
1477 {
1478   int intrq, slot, type;
1479   static int call_cnt = 0;
1480 
1481   /* only do it a few times to avoid filling the message buffer */
1482   if (call_cnt > 10)
1483     return;
1484 
1485   call_cnt++;
1486 
1487   device_printf(sc->dev, "--------- Dumping interrupt queue state ----- \n");
1488   device_printf(sc->dev, "currently expecting interrupts on queue %d\n",
1489 		sc->intr.intrq);
1490   device_printf(sc->dev, " q  slot  status \n");
1491   device_printf(sc->dev, "--- ---- -------- \n");
1492   for (intrq = 0; intrq < 2; intrq++) {
1493 	  for (slot = 0; slot <= maxslot; slot++) {
1494       type = sc->intr.q[intrq][slot].type;
1495 #if 0
1496       if (type == 0 && slot != 0)
1497         continue;
1498 #endif
1499       device_printf(sc->dev, "[%d]:[%d]: type   = 0x%x\n", intrq, slot,
1500 		    type);
1501       device_printf(sc->dev, "[%d]:[%d]: flag    = 0x%x\n", intrq, slot,
1502                 sc->intr.q[intrq][slot].flag);
1503       device_printf(sc->dev, "[%d]:[%d]: index  = 0x%x\n", intrq, slot,
1504                 be16toh(sc->intr.q[intrq][slot].index));
1505       device_printf(sc->dev, "[%d]:[%d]: seqnum = 0x%x\n", intrq, slot,
1506                 (unsigned int)be32toh(sc->intr.q[intrq][slot].seqnum));
1507       device_printf(sc->dev, "[%d]:[%d]: data0  = 0x%x\n", intrq, slot,
1508                 (unsigned int)be32toh(sc->intr.q[intrq][slot].data0));
1509       device_printf(sc->dev, "[%d]:[%d]: data1  = 0x%x\n", intrq, slot,
1510                 (unsigned int)be32toh(sc->intr.q[intrq][slot].data1));
1511 
1512     }
1513   }
1514 
1515 }
1516 
1517 static inline void
1518 myri10ge_claim_irq(myri10ge_softc_t *sc)
1519 {
1520 	volatile uint32_t dontcare;
1521 
1522 
1523 	*sc->irq_claim = 0;
1524 	mb();
1525 
1526 	/* do a PIO read to ensure that PIO write to claim the irq has
1527 	   hit the nic before we exit the interrupt handler */
1528 	if (!myri10ge_skip_pio_read) {
1529 		dontcare = *(volatile uint32_t *)sc->sram;
1530 		mb();
1531 	}
1532 }
1533 
1534 static void
1535 myri10ge_intr(void *arg)
1536 {
1537 	myri10ge_softc_t *sc = arg;
1538 	int intrq, claimed, flags, count, length, ip_csum;
1539         uint32_t raw, slot;
1540 	uint8_t type;
1541 
1542 
1543 	intrq = sc->intr.intrq;
1544 	claimed = 0;
1545 	bus_dmamap_sync(sc->intr.dma[intrq].dmat,
1546 			sc->intr.dma[intrq].map, BUS_DMASYNC_POSTREAD);
1547 	if (sc->msi_enabled) {
1548 		/* We know we can immediately claim the interrupt */
1549 		myri10ge_claim_irq(sc);
1550 		claimed = 1;
1551 	} else {
1552 		/* Check to see if we have the last event in the queue
1553 		   ready.  If so, ack it as early as possible.  This
1554 		   allows more time to get the interrupt line
1555 		   de-asserted prior to the EOI and reduces the chance
1556 		   of seeing a spurious irq caused by the interrupt
1557 		   line remaining high after EOI */
1558 
1559 		slot = be16toh(sc->intr.q[intrq][0].index) - 1;
1560 		if (slot < myri10ge_max_intr_slots &&
1561 		    sc->intr.q[intrq][slot].type  != 0 &&
1562 		    sc->intr.q[intrq][slot].flag != 0) {
1563 			myri10ge_claim_irq(sc);
1564 			claimed = 1;
1565 		}
1566 	}
1567 
1568 	/* walk each slot in the current queue, processing events until
1569 	   we reach an event with a zero type */
1570 	for (slot = sc->intr.slot; slot < myri10ge_max_intr_slots; slot++) {
1571 		type = sc->intr.q[intrq][slot].type;
1572 
1573 		/* check for partially completed DMA of events when
1574 		   using non-MSI interrupts */
1575 		if (__predict_false(!claimed)) {
1576 			mb();
1577 			/* look if there is somscing in the queue */
1578 			if (type == 0) {
1579 				/* save the current slot for the next
1580 				 * time we (re-)enter this routine */
1581 				if (sc->intr.slot == slot) {
1582 					sc->intr.spurious++;
1583 				}
1584 				sc->intr.slot = slot;
1585 				return;
1586 			}
1587 		}
1588 		if (__predict_false(htobe32(sc->intr.q[intrq][slot].seqnum) !=
1589 			     sc->intr.seqnum++)) {
1590 			device_printf(sc->dev, "Bad interrupt!\n");
1591 			device_printf(sc->dev,
1592 				      "bad irq seqno"
1593 				      "(got 0x%x, expected 0x%x) \n",
1594 				      (unsigned int)htobe32(sc->intr.q[intrq][slot].seqnum),
1595 				      sc->intr.seqnum);
1596 			device_printf(sc->dev, "intrq = %d, slot = %d\n",
1597 				      intrq, slot);
1598 			myri10ge_dump_interrupt_queues(sc, slot);
1599 			device_printf(sc->dev,
1600 				      "Disabling futher interrupt handling\n");
1601 			bus_teardown_intr(sc->dev, sc->irq_res,
1602 					  sc->ih);
1603 			sc->ih = NULL;
1604 			return;
1605 		}
1606 
1607 		switch (type) {
1608 		case MYRI10GE_MCP_INTR_ETHER_SEND_DONE:
1609 			myri10ge_tx_done(sc, be32toh(sc->intr.q[intrq][slot].data0));
1610 
1611 			if (__predict_true(sc->intr.q[intrq][slot].data1 == 0))
1612 				break;
1613 
1614 			/* check the link state.  Don't bother to
1615 			 * byteswap, since it can just be 0 or 1 */
1616 			if (sc->link_state != sc->fw_stats->link_up) {
1617 				sc->link_state = sc->fw_stats->link_up;
1618 				if (sc->link_state) {
1619 					if_link_state_change(sc->ifp,
1620 							     LINK_STATE_UP);
1621 					device_printf(sc->dev,
1622 						      "link up\n");
1623 				} else {
1624 					if_link_state_change(sc->ifp,
1625 							     LINK_STATE_DOWN);
1626 					device_printf(sc->dev,
1627 						      "link down\n");
1628 				}
1629 			}
1630 			if (sc->rdma_tags_available !=
1631 			    be32toh(sc->fw_stats->rdma_tags_available)) {
1632 				sc->rdma_tags_available =
1633 					be32toh(sc->fw_stats->rdma_tags_available);
1634 				device_printf(sc->dev, "RDMA timed out!"
1635 					      " %d tags left\n",
1636 					      sc->rdma_tags_available);
1637 			}
1638 
1639 			break;
1640 
1641 
1642 		case MYRI10GE_MCP_INTR_ETHER_RECV_SMALL:
1643 			raw = be32toh(sc->intr.q[intrq][slot].data0);
1644 			count = 0xff & raw;
1645 			flags = raw >> 8;
1646 			raw = be32toh(sc->intr.q[intrq][slot].data1);
1647 			ip_csum = raw >> 16;
1648 			length = 0xffff & raw;
1649 			myri10ge_rx_done_small(sc, length, ip_csum,
1650 					       flags);
1651 			break;
1652 
1653 		case MYRI10GE_MCP_INTR_ETHER_RECV_BIG:
1654 			raw = be32toh(sc->intr.q[intrq][slot].data0);
1655 			count = 0xff & raw;
1656 			flags = raw >> 8;
1657 			raw = be32toh(sc->intr.q[intrq][slot].data1);
1658 			ip_csum = raw >> 16;
1659 			length = 0xffff & raw;
1660 			myri10ge_rx_done_big(sc, length, ip_csum,
1661 					     flags);
1662 
1663 			break;
1664 
1665 		case MYRI10GE_MCP_INTR_LINK_CHANGE:
1666 			/* not yet implemented in firmware */
1667 			break;
1668 
1669 		case MYRI10GE_MCP_INTR_ETHER_DOWN:
1670 			sc->down_cnt++;
1671 			wakeup(&sc->down_cnt);
1672 			break;
1673 
1674 		default:
1675 			device_printf(sc->dev, "Unknown interrupt type %d\n",
1676 				      type);
1677 		}
1678 		sc->intr.q[intrq][slot].type = 0;
1679 		if (sc->intr.q[intrq][slot].flag != 0) {
1680 			if (!claimed) {
1681 				myri10ge_claim_irq(sc);
1682 			}
1683 			sc->intr.slot = 0;
1684 			sc->intr.q[intrq][slot].flag = 0;
1685 			sc->intr.intrq = ((intrq + 1) & 1);
1686 			return;
1687 		}
1688 	}
1689 
1690 	/* we should never be here unless we're on a shared irq and we have
1691 	   not finished setting up the device */
1692 	return;
1693 }
1694 
1695 static void
1696 myri10ge_watchdog(struct ifnet *ifp)
1697 {
1698 	printf("%s called\n", __FUNCTION__);
1699 }
1700 
1701 static void
1702 myri10ge_init(void *arg)
1703 {
1704 }
1705 
1706 
1707 
1708 static void
1709 myri10ge_free_mbufs(myri10ge_softc_t *sc)
1710 {
1711 	int i;
1712 
1713 	for (i = 0; i <= sc->rx_big.mask; i++) {
1714 		if (sc->rx_big.info[i].m == NULL)
1715 			continue;
1716 		bus_dmamap_unload(sc->rx_big.dmat,
1717 				  sc->rx_big.info[i].map);
1718 		m_freem(sc->rx_big.info[i].m);
1719 		sc->rx_big.info[i].m = NULL;
1720 	}
1721 
1722 	for (i = 0; i <= sc->rx_big.mask; i++) {
1723 		if (sc->rx_big.info[i].m == NULL)
1724 			continue;
1725 		bus_dmamap_unload(sc->rx_big.dmat,
1726 				  sc->rx_big.info[i].map);
1727 		m_freem(sc->rx_big.info[i].m);
1728 		sc->rx_big.info[i].m = NULL;
1729 	}
1730 
1731 	for (i = 0; i <= sc->tx.mask; i++) {
1732 		if (sc->tx.info[i].m == NULL)
1733 			continue;
1734 		bus_dmamap_unload(sc->tx.dmat,
1735 				  sc->tx.info[i].map);
1736 		m_freem(sc->tx.info[i].m);
1737 		sc->tx.info[i].m = NULL;
1738 	}
1739 }
1740 
1741 static void
1742 myri10ge_free_rings(myri10ge_softc_t *sc)
1743 {
1744 	int i;
1745 
1746 	if (sc->tx.req_bytes != NULL) {
1747 		free(sc->tx.req_bytes, M_DEVBUF);
1748 	}
1749 	if (sc->rx_small.shadow != NULL)
1750 		free(sc->rx_small.shadow, M_DEVBUF);
1751 	if (sc->rx_big.shadow != NULL)
1752 		free(sc->rx_big.shadow, M_DEVBUF);
1753 	if (sc->tx.info != NULL) {
1754 		for (i = 0; i <= sc->tx.mask; i++) {
1755 			if (sc->tx.info[i].map != NULL)
1756 				bus_dmamap_destroy(sc->tx.dmat,
1757 						   sc->tx.info[i].map);
1758 		}
1759 		free(sc->tx.info, M_DEVBUF);
1760 	}
1761 	if (sc->rx_small.info != NULL) {
1762 		for (i = 0; i <= sc->rx_small.mask; i++) {
1763 			if (sc->rx_small.info[i].map != NULL)
1764 				bus_dmamap_destroy(sc->rx_small.dmat,
1765 						   sc->rx_small.info[i].map);
1766 		}
1767 		free(sc->rx_small.info, M_DEVBUF);
1768 	}
1769 	if (sc->rx_big.info != NULL) {
1770 		for (i = 0; i <= sc->rx_big.mask; i++) {
1771 			if (sc->rx_big.info[i].map != NULL)
1772 				bus_dmamap_destroy(sc->rx_big.dmat,
1773 						   sc->rx_big.info[i].map);
1774 		}
1775 		free(sc->rx_big.info, M_DEVBUF);
1776 	}
1777 	if (sc->rx_big.extra_map != NULL)
1778 		bus_dmamap_destroy(sc->rx_big.dmat,
1779 				   sc->rx_big.extra_map);
1780 	if (sc->rx_small.extra_map != NULL)
1781 		bus_dmamap_destroy(sc->rx_small.dmat,
1782 				   sc->rx_small.extra_map);
1783 	if (sc->tx.dmat != NULL)
1784 		bus_dma_tag_destroy(sc->tx.dmat);
1785 	if (sc->rx_small.dmat != NULL)
1786 		bus_dma_tag_destroy(sc->rx_small.dmat);
1787 	if (sc->rx_big.dmat != NULL)
1788 		bus_dma_tag_destroy(sc->rx_big.dmat);
1789 }
1790 
1791 static int
1792 myri10ge_alloc_rings(myri10ge_softc_t *sc)
1793 {
1794 	myri10ge_cmd_t cmd;
1795 	int tx_ring_size, rx_ring_size;
1796 	int tx_ring_entries, rx_ring_entries;
1797 	int i, err;
1798 	unsigned long bytes;
1799 
1800 	/* get ring sizes */
1801 	err = myri10ge_send_cmd(sc,
1802 				MYRI10GE_MCP_CMD_GET_SEND_RING_SIZE,
1803 				&cmd);
1804 	tx_ring_size = cmd.data0;
1805 	err |= myri10ge_send_cmd(sc,
1806 				 MYRI10GE_MCP_CMD_GET_RX_RING_SIZE,
1807 				 &cmd);
1808 	if (err != 0) {
1809 		device_printf(sc->dev, "Cannot determine ring sizes\n");
1810 		goto abort_with_nothing;
1811 	}
1812 
1813 	rx_ring_size = cmd.data0;
1814 
1815 	tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
1816 	rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
1817 	sc->ifp->if_snd.ifq_maxlen = tx_ring_entries - 1;
1818 	sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
1819 
1820 	sc->tx.mask = tx_ring_entries - 1;
1821 	sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
1822 
1823 	err = ENOMEM;
1824 
1825 	/* allocate the tx request copy block */
1826 	bytes = 8 +
1827 		sizeof (*sc->tx.req_list) * (MYRI10GE_MCP_ETHER_MAX_SEND_DESC + 4);
1828 	sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
1829 	if (sc->tx.req_bytes == NULL)
1830 		goto abort_with_nothing;
1831 	/* ensure req_list entries are aligned to 8 bytes */
1832 	sc->tx.req_list = (mcp_kreq_ether_send_t *)
1833 		((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
1834 
1835 	/* allocate the rx shadow rings */
1836 	bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
1837 	sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1838 	if (sc->rx_small.shadow == NULL)
1839 		goto abort_with_alloc;
1840 
1841 	bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
1842 	sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1843 	if (sc->rx_big.shadow == NULL)
1844 		goto abort_with_alloc;
1845 
1846 	/* allocate the host info rings */
1847 	bytes = tx_ring_entries * sizeof (*sc->tx.info);
1848 	sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1849 	if (sc->tx.info == NULL)
1850 		goto abort_with_alloc;
1851 
1852 	bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
1853 	sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1854 	if (sc->rx_small.info == NULL)
1855 		goto abort_with_alloc;
1856 
1857 	bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
1858 	sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1859 	if (sc->rx_big.info == NULL)
1860 		goto abort_with_alloc;
1861 
1862 	/* allocate the busdma resources */
1863 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1864 				 1,			/* alignment */
1865 				 sc->tx.boundary,	/* boundary */
1866 				 BUS_SPACE_MAXADDR,	/* low */
1867 				 BUS_SPACE_MAXADDR,	/* high */
1868 				 NULL, NULL,		/* filter */
1869 				 MYRI10GE_MAX_ETHER_MTU,/* maxsize */
1870 				 MYRI10GE_MCP_ETHER_MAX_SEND_DESC,/* num segs */
1871 				 sc->tx.boundary,	/* maxsegsize */
1872 				 BUS_DMA_ALLOCNOW,	/* flags */
1873 				 NULL, NULL,		/* lock */
1874 				 &sc->tx.dmat);		/* tag */
1875 
1876 	if (err != 0) {
1877 		device_printf(sc->dev, "Err %d allocating tx dmat\n",
1878 			      err);
1879 		goto abort_with_alloc;
1880 	}
1881 
1882 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1883 				 1,			/* alignment */
1884 				 4096,			/* boundary */
1885 				 BUS_SPACE_MAXADDR,	/* low */
1886 				 BUS_SPACE_MAXADDR,	/* high */
1887 				 NULL, NULL,		/* filter */
1888 				 MHLEN,			/* maxsize */
1889 				 1,			/* num segs */
1890 				 MHLEN,			/* maxsegsize */
1891 				 BUS_DMA_ALLOCNOW,	/* flags */
1892 				 NULL, NULL,		/* lock */
1893 				 &sc->rx_small.dmat);	/* tag */
1894 	if (err != 0) {
1895 		device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
1896 			      err);
1897 		goto abort_with_alloc;
1898 	}
1899 
1900 	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1901 				 1,			/* alignment */
1902 				 4096,			/* boundary */
1903 				 BUS_SPACE_MAXADDR,	/* low */
1904 				 BUS_SPACE_MAXADDR,	/* high */
1905 				 NULL, NULL,		/* filter */
1906 				 4096,			/* maxsize */
1907 				 1,			/* num segs */
1908 				 4096,			/* maxsegsize */
1909 				 BUS_DMA_ALLOCNOW,	/* flags */
1910 				 NULL, NULL,		/* lock */
1911 				 &sc->rx_big.dmat);	/* tag */
1912 	if (err != 0) {
1913 		device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
1914 			      err);
1915 		goto abort_with_alloc;
1916 	}
1917 
1918 	/* now use these tags to setup dmamaps for each slot
1919 	   in each ring */
1920 	for (i = 0; i <= sc->tx.mask; i++) {
1921 		err = bus_dmamap_create(sc->tx.dmat, 0,
1922 					&sc->tx.info[i].map);
1923 		if (err != 0) {
1924 			device_printf(sc->dev, "Err %d  tx dmamap\n",
1925 			      err);
1926 			goto abort_with_alloc;
1927 		}
1928 	}
1929 	for (i = 0; i <= sc->rx_small.mask; i++) {
1930 		err = bus_dmamap_create(sc->rx_small.dmat, 0,
1931 					&sc->rx_small.info[i].map);
1932 		if (err != 0) {
1933 			device_printf(sc->dev, "Err %d  rx_small dmamap\n",
1934 			      err);
1935 			goto abort_with_alloc;
1936 		}
1937 	}
1938 	err = bus_dmamap_create(sc->rx_small.dmat, 0,
1939 				&sc->rx_small.extra_map);
1940 	if (err != 0) {
1941 		device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
1942 			      err);
1943 			goto abort_with_alloc;
1944 	}
1945 
1946 	for (i = 0; i <= sc->rx_big.mask; i++) {
1947 		err = bus_dmamap_create(sc->rx_big.dmat, 0,
1948 					&sc->rx_big.info[i].map);
1949 		if (err != 0) {
1950 			device_printf(sc->dev, "Err %d  rx_big dmamap\n",
1951 			      err);
1952 			goto abort_with_alloc;
1953 		}
1954 	}
1955 	err = bus_dmamap_create(sc->rx_big.dmat, 0,
1956 				&sc->rx_big.extra_map);
1957 	if (err != 0) {
1958 		device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
1959 			      err);
1960 			goto abort_with_alloc;
1961 	}
1962 	return 0;
1963 
1964 abort_with_alloc:
1965 	myri10ge_free_rings(sc);
1966 
1967 abort_with_nothing:
1968 	return err;
1969 }
1970 
1971 static int
1972 myri10ge_open(myri10ge_softc_t *sc)
1973 {
1974 	myri10ge_cmd_t cmd;
1975 	int i, err;
1976 	bus_dmamap_t map;
1977 
1978 
1979 	err = myri10ge_reset(sc);
1980 	if (err != 0) {
1981 		device_printf(sc->dev, "failed to reset\n");
1982 		return EIO;
1983 	}
1984 
1985 	if (MCLBYTES >=
1986 	    sc->ifp->if_mtu + ETHER_HDR_LEN + MYRI10GE_MCP_ETHER_PAD)
1987 		sc->big_bytes = MCLBYTES;
1988 	else
1989 		sc->big_bytes = MJUMPAGESIZE;
1990 
1991 	err = myri10ge_alloc_rings(sc);
1992 	if (err != 0) {
1993 		device_printf(sc->dev, "failed to allocate rings\n");
1994 		return err;
1995 	}
1996 
1997 	err = bus_setup_intr(sc->dev, sc->irq_res,
1998 			     INTR_TYPE_NET | INTR_MPSAFE,
1999 			     myri10ge_intr, sc, &sc->ih);
2000 	if (err != 0) {
2001 		goto abort_with_rings;
2002 	}
2003 
2004 	/* get the lanai pointers to the send and receive rings */
2005 
2006 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_GET_SEND_OFFSET, &cmd);
2007 	sc->tx.lanai =
2008 		(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
2009 	err |= myri10ge_send_cmd(sc,
2010 				 MYRI10GE_MCP_CMD_GET_SMALL_RX_OFFSET, &cmd);
2011 	sc->rx_small.lanai =
2012 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2013 	err |= myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_GET_BIG_RX_OFFSET, &cmd);
2014 	sc->rx_big.lanai =
2015 		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2016 
2017 	if (err != 0) {
2018 		device_printf(sc->dev,
2019 			      "failed to get ring sizes or locations\n");
2020 		err = EIO;
2021 		goto abort_with_irq;
2022 	}
2023 
2024 	if (sc->wc) {
2025 		sc->tx.wc_fifo = sc->sram + 0x200000;
2026 		sc->rx_small.wc_fifo = sc->sram + 0x300000;
2027 		sc->rx_big.wc_fifo = sc->sram + 0x340000;
2028 	} else {
2029 		sc->tx.wc_fifo = 0;
2030 		sc->rx_small.wc_fifo = 0;
2031 		sc->rx_big.wc_fifo = 0;
2032 	}
2033 
2034 
2035 	/* stock receive rings */
2036 	for (i = 0; i <= sc->rx_small.mask; i++) {
2037 		map = sc->rx_small.info[i].map;
2038 		err = myri10ge_get_buf_small(sc, map, i);
2039 		if (err) {
2040 			device_printf(sc->dev, "alloced %d/%d smalls\n",
2041 				      i, sc->rx_small.mask + 1);
2042 			goto abort;
2043 		}
2044 	}
2045 	for (i = 0; i <= sc->rx_big.mask; i++) {
2046 		map = sc->rx_big.info[i].map;
2047 		err = myri10ge_get_buf_big(sc, map, i);
2048 		if (err) {
2049 			device_printf(sc->dev, "alloced %d/%d bigs\n",
2050 				      i, sc->rx_big.mask + 1);
2051 			goto abort;
2052 		}
2053 	}
2054 
2055 	/* Give the firmware the mtu and the big and small buffer
2056 	   sizes.  The firmware wants the big buf size to be a power
2057 	   of two. Luckily, FreeBSD's clusters are powers of two */
2058 	cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN;
2059 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_MTU, &cmd);
2060 	cmd.data0 = MHLEN;
2061 	err |= myri10ge_send_cmd(sc,
2062 				 MYRI10GE_MCP_CMD_SET_SMALL_BUFFER_SIZE,
2063 				 &cmd);
2064 	cmd.data0 = sc->big_bytes;
2065 	err  |= myri10ge_send_cmd(sc,
2066 				  MYRI10GE_MCP_CMD_SET_BIG_BUFFER_SIZE,
2067 				  &cmd);
2068 	/* Now give him the pointer to the stats block */
2069 	cmd.data0 = MYRI10GE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
2070 	cmd.data1 = MYRI10GE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
2071 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_STATS_DMA, &cmd);
2072 
2073 	if (err != 0) {
2074 		device_printf(sc->dev, "failed to setup params\n");
2075 		goto abort;
2076 	}
2077 
2078 	/* Finally, start the firmware running */
2079 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_ETHERNET_UP, &cmd);
2080 	if (err) {
2081 		device_printf(sc->dev, "Couldn't bring up link\n");
2082 		goto abort;
2083 	}
2084 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2085 	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2086 
2087 	return 0;
2088 
2089 
2090 abort:
2091 	myri10ge_free_mbufs(sc);
2092 abort_with_irq:
2093 	bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2094 abort_with_rings:
2095 	myri10ge_free_rings(sc);
2096 	return err;
2097 }
2098 
2099 static int
2100 myri10ge_close(myri10ge_softc_t *sc)
2101 {
2102 	myri10ge_cmd_t cmd;
2103 	int err, old_down_cnt;
2104 
2105 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2106 	old_down_cnt = sc->down_cnt;
2107 	mb();
2108 	err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_ETHERNET_DOWN, &cmd);
2109 	if (err) {
2110 		device_printf(sc->dev, "Couldn't bring down link\n");
2111 	}
2112 	if (old_down_cnt == sc->down_cnt) {
2113 		/* wait for down irq */
2114 		(void)tsleep(&sc->down_cnt, PWAIT, "down myri10ge", hz);
2115 	}
2116 	if (old_down_cnt == sc->down_cnt) {
2117 		device_printf(sc->dev, "never got down irq\n");
2118 	}
2119 	if (sc->ih != NULL)
2120 		bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2121 	myri10ge_free_mbufs(sc);
2122 	myri10ge_free_rings(sc);
2123 	return 0;
2124 }
2125 
2126 
2127 static int
2128 myri10ge_media_change(struct ifnet *ifp)
2129 {
2130 	return EINVAL;
2131 }
2132 
2133 static int
2134 myri10ge_change_mtu(myri10ge_softc_t *sc, int mtu)
2135 {
2136 	struct ifnet *ifp = sc->ifp;
2137 	int real_mtu, old_mtu;
2138 	int err = 0;
2139 
2140 
2141 	real_mtu = mtu + ETHER_HDR_LEN;
2142 	if ((real_mtu > MYRI10GE_MAX_ETHER_MTU) ||
2143 	    real_mtu < 60)
2144 		return EINVAL;
2145 	sx_xlock(&sc->driver_lock);
2146 	old_mtu = ifp->if_mtu;
2147 	ifp->if_mtu = mtu;
2148 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2149 		myri10ge_close(sc);
2150 		err = myri10ge_open(sc);
2151 		if (err != 0) {
2152 			ifp->if_mtu = old_mtu;
2153 			myri10ge_close(sc);
2154 			(void) myri10ge_open(sc);
2155 		}
2156 	}
2157 	sx_xunlock(&sc->driver_lock);
2158 	return err;
2159 }
2160 
2161 static void
2162 myri10ge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2163 {
2164 	myri10ge_softc_t *sc = ifp->if_softc;
2165 
2166 
2167 	if (sc == NULL)
2168 		return;
2169 	ifmr->ifm_status = IFM_AVALID;
2170 	ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0;
2171 	ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
2172 	ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0;
2173 }
2174 
2175 static int
2176 myri10ge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2177 {
2178 	myri10ge_softc_t *sc = ifp->if_softc;
2179 	struct ifreq *ifr = (struct ifreq *)data;
2180 	int err, mask;
2181 
2182 	err = 0;
2183 	switch (command) {
2184 	case SIOCSIFADDR:
2185 	case SIOCGIFADDR:
2186 		err = ether_ioctl(ifp, command, data);
2187 		break;
2188 
2189 	case SIOCSIFMTU:
2190 		err = myri10ge_change_mtu(sc, ifr->ifr_mtu);
2191 		break;
2192 
2193 	case SIOCSIFFLAGS:
2194 		sx_xlock(&sc->driver_lock);
2195 		if (ifp->if_flags & IFF_UP) {
2196 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
2197 				err = myri10ge_open(sc);
2198 		} else {
2199 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2200 				myri10ge_close(sc);
2201 		}
2202 		sx_xunlock(&sc->driver_lock);
2203 		break;
2204 
2205 	case SIOCADDMULTI:
2206 	case SIOCDELMULTI:
2207 		err = 0;
2208 		break;
2209 
2210 	case SIOCSIFCAP:
2211 		sx_xlock(&sc->driver_lock);
2212 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2213 		if (mask & IFCAP_TXCSUM) {
2214 			if (IFCAP_TXCSUM & ifp->if_capenable) {
2215 				ifp->if_capenable &= ~IFCAP_TXCSUM;
2216 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
2217 			} else {
2218 				ifp->if_capenable |= IFCAP_TXCSUM;
2219 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
2220 			}
2221 		} else if (mask & IFCAP_RXCSUM) {
2222 			if (IFCAP_RXCSUM & ifp->if_capenable) {
2223 				ifp->if_capenable &= ~IFCAP_RXCSUM;
2224 				sc->csum_flag &= ~MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
2225 			} else {
2226 				ifp->if_capenable |= IFCAP_RXCSUM;
2227 				sc->csum_flag |= MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
2228 			}
2229 		}
2230 		sx_xunlock(&sc->driver_lock);
2231 		break;
2232 
2233 	case SIOCGIFMEDIA:
2234 		err = ifmedia_ioctl(ifp, (struct ifreq *)data,
2235 				    &sc->media, command);
2236                 break;
2237 
2238 	default:
2239 		err = ENOTTY;
2240         }
2241 	return err;
2242 }
2243 
2244 static void
2245 myri10ge_fetch_tunables(myri10ge_softc_t *sc)
2246 {
2247 
2248 	TUNABLE_INT_FETCH("hw.myri10ge.flow_control_enabled",
2249 			  &myri10ge_flow_control);
2250 	TUNABLE_INT_FETCH("hw.myri10ge.intr_coal_delay",
2251 			  &myri10ge_intr_coal_delay);
2252 	TUNABLE_INT_FETCH("hw.myri10ge.nvidia_ecrc_enable",
2253 			  &myri10ge_nvidia_ecrc_enable);
2254 	TUNABLE_INT_FETCH("hw.myri10ge.skip_pio_read",
2255 			  &myri10ge_skip_pio_read);
2256 
2257 	if (myri10ge_intr_coal_delay < 0 ||
2258 	    myri10ge_intr_coal_delay > 10*1000)
2259 		myri10ge_intr_coal_delay = 30;
2260 	sc->pause = myri10ge_flow_control;
2261 }
2262 
2263 static int
2264 myri10ge_attach(device_t dev)
2265 {
2266 	myri10ge_softc_t *sc = device_get_softc(dev);
2267 	struct ifnet *ifp;
2268 	size_t bytes;
2269 	int rid, err, i;
2270 	uint16_t cmd;
2271 
2272 	sc->dev = dev;
2273 	myri10ge_fetch_tunables(sc);
2274 
2275 	err = bus_dma_tag_create(NULL,			/* parent */
2276 				 1,			/* alignment */
2277 				 4096,			/* boundary */
2278 				 BUS_SPACE_MAXADDR,	/* low */
2279 				 BUS_SPACE_MAXADDR,	/* high */
2280 				 NULL, NULL,		/* filter */
2281 				 MYRI10GE_MAX_ETHER_MTU,/* maxsize */
2282 				 MYRI10GE_MCP_ETHER_MAX_SEND_DESC, /* num segs */
2283 				 4096,			/* maxsegsize */
2284 				 0,			/* flags */
2285 				 NULL, NULL,		/* lock */
2286 				 &sc->parent_dmat);	/* tag */
2287 
2288 	if (err != 0) {
2289 		device_printf(sc->dev, "Err %d allocating parent dmat\n",
2290 			      err);
2291 		goto abort_with_nothing;
2292 	}
2293 
2294 	ifp = sc->ifp = if_alloc(IFT_ETHER);
2295 	if (ifp == NULL) {
2296 		device_printf(dev, "can not if_alloc()\n");
2297 		err = ENOSPC;
2298 		goto abort_with_parent_dmat;
2299 	}
2300 	mtx_init(&sc->cmd_lock, NULL,
2301 		 MTX_NETWORK_LOCK, MTX_DEF);
2302 	mtx_init(&sc->tx_lock, device_get_nameunit(dev),
2303 		 MTX_NETWORK_LOCK, MTX_DEF);
2304 	sx_init(&sc->driver_lock, device_get_nameunit(dev));
2305 
2306 	/* Enable DMA and Memory space access */
2307 	pci_enable_busmaster(dev);
2308 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2309 	cmd |= PCIM_CMD_MEMEN;
2310 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2311 
2312 	/* Map the board into the kernel */
2313 	rid = PCIR_BARS;
2314 	sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
2315 					 ~0, 1, RF_ACTIVE);
2316 	if (sc->mem_res == NULL) {
2317 		device_printf(dev, "could not map memory\n");
2318 		err = ENXIO;
2319 		goto abort_with_lock;
2320 	}
2321 	sc->sram = rman_get_virtual(sc->mem_res);
2322 	sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
2323 	if (sc->sram_size > rman_get_size(sc->mem_res)) {
2324 		device_printf(dev, "impossible memory region size %ld\n",
2325 			      rman_get_size(sc->mem_res));
2326 		err = ENXIO;
2327 		goto abort_with_mem_res;
2328 	}
2329 
2330 	/* make NULL terminated copy of the EEPROM strings section of
2331 	   lanai SRAM */
2332 	bzero(sc->eeprom_strings, MYRI10GE_EEPROM_STRINGS_SIZE);
2333 	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
2334 				rman_get_bushandle(sc->mem_res),
2335 				sc->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE,
2336 				sc->eeprom_strings,
2337 				MYRI10GE_EEPROM_STRINGS_SIZE - 2);
2338 	err = myri10ge_parse_strings(sc);
2339 	if (err != 0)
2340 		goto abort_with_mem_res;
2341 
2342 	/* Enable write combining for efficient use of PCIe bus */
2343 	myri10ge_enable_wc(sc);
2344 
2345 	/* Allocate the out of band dma memory */
2346 	err = myri10ge_dma_alloc(sc, &sc->cmd_dma,
2347 				 sizeof (myri10ge_cmd_t), 64);
2348 	if (err != 0)
2349 		goto abort_with_mem_res;
2350 	sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
2351 	err = myri10ge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
2352 	if (err != 0)
2353 		goto abort_with_cmd_dma;
2354 
2355 	err = myri10ge_dma_alloc(sc, &sc->fw_stats_dma,
2356 				 sizeof (*sc->fw_stats), 64);
2357 	if (err != 0)
2358 		goto abort_with_zeropad_dma;
2359 	sc->fw_stats = (mcp_stats_t *)sc->fw_stats_dma.addr;
2360 
2361 
2362 	/* allocate interrupt queues */
2363 	bytes = myri10ge_max_intr_slots * sizeof (*sc->intr.q[0]);
2364 	for (i = 0; i < MYRI10GE_NUM_INTRQS; i++) {
2365 		err = myri10ge_dma_alloc(sc, &sc->intr.dma[i],
2366 					 bytes, 4096);
2367 		if (err != 0)
2368 			goto abort_with_intrq;
2369 		sc->intr.q[i] = (mcp_slot_t *)sc->intr.dma[i].addr;
2370 	}
2371 
2372 	/* Add our ithread  */
2373 	rid = 0;
2374 	sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
2375 					 1, RF_SHAREABLE | RF_ACTIVE);
2376 	if (sc->irq_res == NULL) {
2377 		device_printf(dev, "could not alloc interrupt\n");
2378 		goto abort_with_intrq;
2379 	}
2380 
2381 	/* load the firmware */
2382 	myri10ge_select_firmware(sc);
2383 
2384 	err = myri10ge_load_firmware(sc);
2385 	if (err != 0)
2386 		goto abort_with_irq_res;
2387 	err = myri10ge_reset(sc);
2388 	if (err != 0)
2389 		goto abort_with_irq_res;
2390 
2391 	/* hook into the network stack */
2392 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2393 	ifp->if_baudrate = 100000000;
2394 	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM;
2395 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP;
2396 	ifp->if_capenable = ifp->if_capabilities;
2397 	sc->csum_flag |= MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
2398         ifp->if_init = myri10ge_init;
2399         ifp->if_softc = sc;
2400         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2401         ifp->if_ioctl = myri10ge_ioctl;
2402         ifp->if_start = myri10ge_start;
2403 	ifp->if_watchdog = myri10ge_watchdog;
2404 	ether_ifattach(ifp, sc->mac_addr);
2405 	/* ether_ifattach sets mtu to 1500 */
2406 	ifp->if_mtu = MYRI10GE_MAX_ETHER_MTU - ETHER_HDR_LEN;
2407 
2408 	/* Initialise the ifmedia structure */
2409 	ifmedia_init(&sc->media, 0, myri10ge_media_change,
2410 		     myri10ge_media_status);
2411 	ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL);
2412 	myri10ge_add_sysctls(sc);
2413 	return 0;
2414 
2415 abort_with_irq_res:
2416 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2417 abort_with_intrq:
2418 	for (i = 0;  i < MYRI10GE_NUM_INTRQS; i++) {
2419 		if (sc->intr.q[i] == NULL)
2420 			continue;
2421 		sc->intr.q[i] = NULL;
2422 		myri10ge_dma_free(&sc->intr.dma[i]);
2423 	}
2424 	myri10ge_dma_free(&sc->fw_stats_dma);
2425 abort_with_zeropad_dma:
2426 	myri10ge_dma_free(&sc->zeropad_dma);
2427 abort_with_cmd_dma:
2428 	myri10ge_dma_free(&sc->cmd_dma);
2429 abort_with_mem_res:
2430 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2431 abort_with_lock:
2432 	pci_disable_busmaster(dev);
2433 	mtx_destroy(&sc->cmd_lock);
2434 	mtx_destroy(&sc->tx_lock);
2435 	sx_destroy(&sc->driver_lock);
2436 	if_free(ifp);
2437 abort_with_parent_dmat:
2438 	bus_dma_tag_destroy(sc->parent_dmat);
2439 
2440 abort_with_nothing:
2441 	return err;
2442 }
2443 
2444 static int
2445 myri10ge_detach(device_t dev)
2446 {
2447 	myri10ge_softc_t *sc = device_get_softc(dev);
2448 	int i;
2449 
2450 	sx_xlock(&sc->driver_lock);
2451 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
2452 		myri10ge_close(sc);
2453 	sx_xunlock(&sc->driver_lock);
2454 	ether_ifdetach(sc->ifp);
2455 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2456 	for (i = 0;  i < MYRI10GE_NUM_INTRQS; i++) {
2457 		if (sc->intr.q[i] == NULL)
2458 			continue;
2459 		sc->intr.q[i] = NULL;
2460 		myri10ge_dma_free(&sc->intr.dma[i]);
2461 	}
2462 	myri10ge_dma_free(&sc->fw_stats_dma);
2463 	myri10ge_dma_free(&sc->zeropad_dma);
2464 	myri10ge_dma_free(&sc->cmd_dma);
2465 	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2466 	pci_disable_busmaster(dev);
2467 	mtx_destroy(&sc->cmd_lock);
2468 	mtx_destroy(&sc->tx_lock);
2469 	sx_destroy(&sc->driver_lock);
2470 	if_free(sc->ifp);
2471 	bus_dma_tag_destroy(sc->parent_dmat);
2472 	return 0;
2473 }
2474 
2475 static int
2476 myri10ge_shutdown(device_t dev)
2477 {
2478 	return 0;
2479 }
2480 
2481 /*
2482   This file uses Myri10GE driver indentation.
2483 
2484   Local Variables:
2485   c-file-style:"linux"
2486   tab-width:8
2487   End:
2488 */
2489