xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision fc5a2e51fb49ec3cbe8f670d6c1000abecf6df27)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12 2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/rman.h>
44 #include <sys/ioccom.h>
45 #include <sys/mbuf.h>
46 #include <sys/linker.h>
47 #include <sys/syslog.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/queue.h>
54 #include <sys/taskqueue.h>
55 
56 #include <net/bpf.h>
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_types.h>
63 
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/if_ether.h>
67 #include <netinet/ip.h>
68 #include <netinet/ip.h>
69 #include <netinet/tcp.h>
70 #include <netinet/udp.h>
71 
72 #include <dev/pci/pcireg.h>
73 #include <dev/pci/pcivar.h>
74 #include <dev/pci/pci_private.h>
75 
76 
77 #include <vm/vm.h>
78 #include <vm/vm_page.h>
79 #include <vm/vm_map.h>
80 
81 #ifdef CONFIG_DEFINED
82 #include <cxgb_include.h>
83 #include <sys/mvec.h>
84 #else
85 #include <dev/cxgb/cxgb_include.h>
86 #include <dev/cxgb/sys/mvec.h>
87 #endif
88 
89 #ifdef PRIV_SUPPORTED
90 #include <sys/priv.h>
91 #endif
92 
93 static int cxgb_setup_msix(adapter_t *, int);
94 static void cxgb_teardown_msix(adapter_t *);
95 static void cxgb_init(void *);
96 static void cxgb_init_locked(struct port_info *);
97 static void cxgb_stop_locked(struct port_info *);
98 static void cxgb_set_rxmode(struct port_info *);
99 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
100 static void cxgb_start(struct ifnet *);
101 static void cxgb_start_proc(void *, int ncount);
102 static int cxgb_media_change(struct ifnet *);
103 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
104 static int setup_sge_qsets(adapter_t *);
105 static void cxgb_async_intr(void *);
106 static void cxgb_ext_intr_handler(void *, int);
107 static void cxgb_tick_handler(void *, int);
108 static void cxgb_down_locked(struct adapter *sc);
109 static void cxgb_tick(void *);
110 static void setup_rss(adapter_t *sc);
111 
112 /* Attachment glue for the PCI controller end of the device.  Each port of
113  * the device is attached separately, as defined later.
114  */
115 static int cxgb_controller_probe(device_t);
116 static int cxgb_controller_attach(device_t);
117 static int cxgb_controller_detach(device_t);
118 static void cxgb_free(struct adapter *);
119 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
120     unsigned int end);
121 static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
122 static int cxgb_get_regs_len(void);
123 static int offload_open(struct port_info *pi);
124 static void touch_bars(device_t dev);
125 
126 #ifdef notyet
127 static int offload_close(struct t3cdev *tdev);
128 #endif
129 
130 
131 static device_method_t cxgb_controller_methods[] = {
132 	DEVMETHOD(device_probe,		cxgb_controller_probe),
133 	DEVMETHOD(device_attach,	cxgb_controller_attach),
134 	DEVMETHOD(device_detach,	cxgb_controller_detach),
135 
136 	/* bus interface */
137 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
138 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
139 
140 	{ 0, 0 }
141 };
142 
143 static driver_t cxgb_controller_driver = {
144 	"cxgbc",
145 	cxgb_controller_methods,
146 	sizeof(struct adapter)
147 };
148 
149 static devclass_t	cxgb_controller_devclass;
150 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
151 
152 /*
153  * Attachment glue for the ports.  Attachment is done directly to the
154  * controller device.
155  */
156 static int cxgb_port_probe(device_t);
157 static int cxgb_port_attach(device_t);
158 static int cxgb_port_detach(device_t);
159 
160 static device_method_t cxgb_port_methods[] = {
161 	DEVMETHOD(device_probe,		cxgb_port_probe),
162 	DEVMETHOD(device_attach,	cxgb_port_attach),
163 	DEVMETHOD(device_detach,	cxgb_port_detach),
164 	{ 0, 0 }
165 };
166 
167 static driver_t cxgb_port_driver = {
168 	"cxgb",
169 	cxgb_port_methods,
170 	0
171 };
172 
173 static d_ioctl_t cxgb_extension_ioctl;
174 static d_open_t cxgb_extension_open;
175 static d_close_t cxgb_extension_close;
176 
177 static struct cdevsw cxgb_cdevsw = {
178        .d_version =    D_VERSION,
179        .d_flags =      0,
180        .d_open =       cxgb_extension_open,
181        .d_close =      cxgb_extension_close,
182        .d_ioctl =      cxgb_extension_ioctl,
183        .d_name =       "cxgb",
184 };
185 
186 static devclass_t	cxgb_port_devclass;
187 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
188 
189 #define SGE_MSIX_COUNT (SGE_QSETS + 1)
190 
191 extern int collapse_mbufs;
192 /*
193  * The driver uses the best interrupt scheme available on a platform in the
194  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
195  * of these schemes the driver may consider as follows:
196  *
197  * msi = 2: choose from among all three options
198  * msi = 1 : only consider MSI and pin interrupts
199  * msi = 0: force pin interrupts
200  */
201 static int msi_allowed = 2;
202 
203 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
204 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
205 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
206     "MSI-X, MSI, INTx selector");
207 
208 /*
209  * The driver enables offload as a default.
210  * To disable it, use ofld_disable = 1.
211  */
212 static int ofld_disable = 0;
213 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
214 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
215     "disable ULP offload");
216 
217 /*
218  * The driver uses an auto-queue algorithm by default.
219  * To disable it and force a single queue-set per port, use singleq = 1.
220  */
221 static int singleq = 1;
222 TUNABLE_INT("hw.cxgb.singleq", &singleq);
223 SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
224     "use a single queue-set per port");
225 
226 enum {
227 	MAX_TXQ_ENTRIES      = 16384,
228 	MAX_CTRL_TXQ_ENTRIES = 1024,
229 	MAX_RSPQ_ENTRIES     = 16384,
230 	MAX_RX_BUFFERS       = 16384,
231 	MAX_RX_JUMBO_BUFFERS = 16384,
232 	MIN_TXQ_ENTRIES      = 4,
233 	MIN_CTRL_TXQ_ENTRIES = 4,
234 	MIN_RSPQ_ENTRIES     = 32,
235 	MIN_FL_ENTRIES       = 32,
236 	MIN_FL_JUMBO_ENTRIES = 32
237 };
238 
239 struct filter_info {
240 	u32 sip;
241 	u32 sip_mask;
242 	u32 dip;
243 	u16 sport;
244 	u16 dport;
245 	u32 vlan:12;
246 	u32 vlan_prio:3;
247 	u32 mac_hit:1;
248 	u32 mac_idx:4;
249 	u32 mac_vld:1;
250 	u32 pkt_type:2;
251 	u32 report_filter_id:1;
252 	u32 pass:1;
253 	u32 rss:1;
254 	u32 qset:3;
255 	u32 locked:1;
256 	u32 valid:1;
257 };
258 
259 enum { FILTER_NO_VLAN_PRI = 7 };
260 
261 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
262 
263 /* Table for probing the cards.  The desc field isn't actually used */
264 struct cxgb_ident {
265 	uint16_t	vendor;
266 	uint16_t	device;
267 	int		index;
268 	char		*desc;
269 } cxgb_identifiers[] = {
270 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
271 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
272 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
273 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
274 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
275 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
276 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
277 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
278 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
279 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
280 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
281 	{0, 0, 0, NULL}
282 };
283 
284 
285 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
286 
287 static inline char
288 t3rev2char(struct adapter *adapter)
289 {
290 	char rev = 'z';
291 
292 	switch(adapter->params.rev) {
293 	case T3_REV_A:
294 		rev = 'a';
295 		break;
296 	case T3_REV_B:
297 	case T3_REV_B2:
298 		rev = 'b';
299 		break;
300 	case T3_REV_C:
301 		rev = 'c';
302 		break;
303 	}
304 	return rev;
305 }
306 
307 static struct cxgb_ident *
308 cxgb_get_ident(device_t dev)
309 {
310 	struct cxgb_ident *id;
311 
312 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
313 		if ((id->vendor == pci_get_vendor(dev)) &&
314 		    (id->device == pci_get_device(dev))) {
315 			return (id);
316 		}
317 	}
318 	return (NULL);
319 }
320 
321 static const struct adapter_info *
322 cxgb_get_adapter_info(device_t dev)
323 {
324 	struct cxgb_ident *id;
325 	const struct adapter_info *ai;
326 
327 	id = cxgb_get_ident(dev);
328 	if (id == NULL)
329 		return (NULL);
330 
331 	ai = t3_get_adapter_info(id->index);
332 
333 	return (ai);
334 }
335 
336 static int
337 cxgb_controller_probe(device_t dev)
338 {
339 	const struct adapter_info *ai;
340 	char *ports, buf[80];
341 	int nports;
342 
343 	ai = cxgb_get_adapter_info(dev);
344 	if (ai == NULL)
345 		return (ENXIO);
346 
347 	nports = ai->nports0 + ai->nports1;
348 	if (nports == 1)
349 		ports = "port";
350 	else
351 		ports = "ports";
352 
353 	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
354 	device_set_desc_copy(dev, buf);
355 	return (BUS_PROBE_DEFAULT);
356 }
357 
358 #define FW_FNAME "t3fw%d%d%d"
359 #define TPEEPROM_NAME "t3%ctpe%d%d%d"
360 #define TPSRAM_NAME "t3%cps%d%d%d"
361 
362 static int
363 upgrade_fw(adapter_t *sc)
364 {
365 	char buf[32];
366 #ifdef FIRMWARE_LATEST
367 	const struct firmware *fw;
368 #else
369 	struct firmware *fw;
370 #endif
371 	int status;
372 
373 	snprintf(&buf[0], sizeof(buf), FW_FNAME,  FW_VERSION_MAJOR,
374 	    FW_VERSION_MINOR, FW_VERSION_MICRO);
375 
376 	fw = firmware_get(buf);
377 
378 	if (fw == NULL) {
379 		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
380 		return (ENOENT);
381 	} else
382 		device_printf(sc->dev, "updating firmware on card with %s\n", buf);
383 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
384 
385 	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
386 
387 	firmware_put(fw, FIRMWARE_UNLOAD);
388 
389 	return (status);
390 }
391 
392 static int
393 cxgb_controller_attach(device_t dev)
394 {
395 	device_t child;
396 	const struct adapter_info *ai;
397 	struct adapter *sc;
398 	int i, error = 0;
399 	uint32_t vers;
400 	int port_qsets = 1;
401 #ifdef MSI_SUPPORTED
402 	int msi_needed, reg;
403 #endif
404 	sc = device_get_softc(dev);
405 	sc->dev = dev;
406 	sc->msi_count = 0;
407 	ai = cxgb_get_adapter_info(dev);
408 
409 	/*
410 	 * XXX not really related but a recent addition
411 	 */
412 #ifdef MSI_SUPPORTED
413 	/* find the PCIe link width and set max read request to 4KB*/
414 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
415 		uint16_t lnk, pectl;
416 		lnk = pci_read_config(dev, reg + 0x12, 2);
417 		sc->link_width = (lnk >> 4) & 0x3f;
418 
419 		pectl = pci_read_config(dev, reg + 0x8, 2);
420 		pectl = (pectl & ~0x7000) | (5 << 12);
421 		pci_write_config(dev, reg + 0x8, pectl, 2);
422 	}
423 
424 	if (sc->link_width != 0 && sc->link_width <= 4 &&
425 	    (ai->nports0 + ai->nports1) <= 2) {
426 		device_printf(sc->dev,
427 		    "PCIe x%d Link, expect reduced performance\n",
428 		    sc->link_width);
429 	}
430 #endif
431 	touch_bars(dev);
432 	pci_enable_busmaster(dev);
433 	/*
434 	 * Allocate the registers and make them available to the driver.
435 	 * The registers that we care about for NIC mode are in BAR 0
436 	 */
437 	sc->regs_rid = PCIR_BAR(0);
438 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
439 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
440 		device_printf(dev, "Cannot allocate BAR\n");
441 		return (ENXIO);
442 	}
443 
444 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
445 	    device_get_unit(dev));
446 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
447 
448 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
449 	    device_get_unit(dev));
450 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
451 	    device_get_unit(dev));
452 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
453 	    device_get_unit(dev));
454 
455 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_DEF);
456 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
457 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
458 
459 	sc->bt = rman_get_bustag(sc->regs_res);
460 	sc->bh = rman_get_bushandle(sc->regs_res);
461 	sc->mmio_len = rman_get_size(sc->regs_res);
462 
463 	if (t3_prep_adapter(sc, ai, 1) < 0) {
464 		printf("prep adapter failed\n");
465 		error = ENODEV;
466 		goto out;
467 	}
468 	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
469 	 * enough messages for the queue sets.  If that fails, try falling
470 	 * back to MSI.  If that fails, then try falling back to the legacy
471 	 * interrupt pin model.
472 	 */
473 #ifdef MSI_SUPPORTED
474 
475 	sc->msix_regs_rid = 0x20;
476 	if ((msi_allowed >= 2) &&
477 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
478 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
479 
480 		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
481 
482 		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
483 		    (sc->msi_count != msi_needed)) {
484 			device_printf(dev, "msix allocation failed - msi_count = %d"
485 			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
486 			    msi_needed, error);
487 			sc->msi_count = 0;
488 			pci_release_msi(dev);
489 			bus_release_resource(dev, SYS_RES_MEMORY,
490 			    sc->msix_regs_rid, sc->msix_regs_res);
491 			sc->msix_regs_res = NULL;
492 		} else {
493 			sc->flags |= USING_MSIX;
494 			sc->cxgb_intr = t3_intr_msix;
495 		}
496 	}
497 
498 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
499 		sc->msi_count = 1;
500 		if (pci_alloc_msi(dev, &sc->msi_count)) {
501 			device_printf(dev, "alloc msi failed - will try INTx\n");
502 			sc->msi_count = 0;
503 			pci_release_msi(dev);
504 		} else {
505 			sc->flags |= USING_MSI;
506 			sc->irq_rid = 1;
507 			sc->cxgb_intr = t3_intr_msi;
508 		}
509 	}
510 #endif
511 	if (sc->msi_count == 0) {
512 		device_printf(dev, "using line interrupts\n");
513 		sc->irq_rid = 0;
514 		sc->cxgb_intr = t3b_intr;
515 	}
516 
517 
518 	/* Create a private taskqueue thread for handling driver events */
519 #ifdef TASKQUEUE_CURRENT
520 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
521 	    taskqueue_thread_enqueue, &sc->tq);
522 #else
523 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
524 	    taskqueue_thread_enqueue, &sc->tq);
525 #endif
526 	if (sc->tq == NULL) {
527 		device_printf(dev, "failed to allocate controller task queue\n");
528 		goto out;
529 	}
530 
531 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
532 	    device_get_nameunit(dev));
533 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
534 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
535 
536 
537 	/* Create a periodic callout for checking adapter status */
538 	callout_init(&sc->cxgb_tick_ch, TRUE);
539 
540 	if (t3_check_fw_version(sc) != 0) {
541 		/*
542 		 * Warn user that a firmware update will be attempted in init.
543 		 */
544 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
545 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
546 		sc->flags &= ~FW_UPTODATE;
547 	} else {
548 		sc->flags |= FW_UPTODATE;
549 	}
550 
551 	if (t3_check_tpsram_version(sc) != 0) {
552 		/*
553 		 * Warn user that a firmware update will be attempted in init.
554 		 */
555 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
556 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
557 		sc->flags &= ~TPS_UPTODATE;
558 	} else {
559 		sc->flags |= TPS_UPTODATE;
560 	}
561 
562 	if ((sc->flags & USING_MSIX) && !singleq)
563 		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
564 
565 	/*
566 	 * Create a child device for each MAC.  The ethernet attachment
567 	 * will be done in these children.
568 	 */
569 	for (i = 0; i < (sc)->params.nports; i++) {
570 		struct port_info *pi;
571 
572 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
573 			device_printf(dev, "failed to add child port\n");
574 			error = EINVAL;
575 			goto out;
576 		}
577 		pi = &sc->port[i];
578 		pi->adapter = sc;
579 		pi->nqsets = port_qsets;
580 		pi->first_qset = i*port_qsets;
581 		pi->port_id = i;
582 		pi->tx_chan = i >= ai->nports0;
583 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
584 		sc->rxpkt_map[pi->txpkt_intf] = i;
585 		sc->portdev[i] = child;
586 		device_set_softc(child, pi);
587 	}
588 	if ((error = bus_generic_attach(dev)) != 0)
589 		goto out;
590 
591 	/*
592 	 * XXX need to poll for link status
593 	 */
594 	sc->params.stats_update_period = 1;
595 
596 	/* initialize sge private state */
597 	t3_sge_init_adapter(sc);
598 
599 	t3_led_ready(sc);
600 
601 	cxgb_offload_init();
602 	if (is_offload(sc)) {
603 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
604 		cxgb_adapter_ofld(sc);
605         }
606 	error = t3_get_fw_version(sc, &vers);
607 	if (error)
608 		goto out;
609 
610 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
611 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
612 	    G_FW_VERSION_MICRO(vers));
613 
614 	t3_add_sysctls(sc);
615 out:
616 	if (error)
617 		cxgb_free(sc);
618 
619 	return (error);
620 }
621 
622 static int
623 cxgb_controller_detach(device_t dev)
624 {
625 	struct adapter *sc;
626 
627 	sc = device_get_softc(dev);
628 
629 	cxgb_free(sc);
630 
631 	return (0);
632 }
633 
634 static void
635 cxgb_free(struct adapter *sc)
636 {
637 	int i;
638 
639 	ADAPTER_LOCK(sc);
640 	/*
641 	 * drops the lock
642 	 */
643 	cxgb_down_locked(sc);
644 
645 #ifdef MSI_SUPPORTED
646 	if (sc->flags & (USING_MSI | USING_MSIX)) {
647 		device_printf(sc->dev, "releasing msi message(s)\n");
648 		pci_release_msi(sc->dev);
649 	} else {
650 		device_printf(sc->dev, "no msi message to release\n");
651 	}
652 #endif
653 	if (sc->msix_regs_res != NULL) {
654 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
655 		    sc->msix_regs_res);
656 	}
657 
658 	if (sc->tq != NULL) {
659 		taskqueue_drain(sc->tq, &sc->ext_intr_task);
660 		taskqueue_drain(sc->tq, &sc->tick_task);
661 	}
662 	t3_sge_deinit_sw(sc);
663 	/*
664 	 * Wait for last callout
665 	 */
666 
667 	tsleep(&sc, 0, "cxgb unload", 3*hz);
668 
669 	for (i = 0; i < (sc)->params.nports; ++i) {
670 		if (sc->portdev[i] != NULL)
671 			device_delete_child(sc->dev, sc->portdev[i]);
672 	}
673 
674 	bus_generic_detach(sc->dev);
675 	if (sc->tq != NULL)
676 		taskqueue_free(sc->tq);
677 #ifdef notyet
678 	if (is_offload(sc)) {
679 		cxgb_adapter_unofld(sc);
680 		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
681 			offload_close(&sc->tdev);
682 	}
683 #endif
684 
685 	t3_free_sge_resources(sc);
686 	free(sc->filters, M_DEVBUF);
687 	t3_sge_free(sc);
688 
689 	cxgb_offload_exit();
690 
691 	if (sc->regs_res != NULL)
692 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
693 		    sc->regs_res);
694 
695 	MTX_DESTROY(&sc->mdio_lock);
696 	MTX_DESTROY(&sc->sge.reg_lock);
697 	MTX_DESTROY(&sc->elmer_lock);
698 	ADAPTER_LOCK_DEINIT(sc);
699 
700 	return;
701 }
702 
703 /**
704  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
705  *	@sc: the controller softc
706  *
707  *	Determines how many sets of SGE queues to use and initializes them.
708  *	We support multiple queue sets per port if we have MSI-X, otherwise
709  *	just one queue set per port.
710  */
711 static int
712 setup_sge_qsets(adapter_t *sc)
713 {
714 	int i, j, err, irq_idx = 0, qset_idx = 0;
715 	u_int ntxq = SGE_TXQ_PER_SET;
716 
717 	if ((err = t3_sge_alloc(sc)) != 0) {
718 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
719 		return (err);
720 	}
721 
722 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
723 		irq_idx = -1;
724 
725 	for (i = 0; i < (sc)->params.nports; i++) {
726 		struct port_info *pi = &sc->port[i];
727 
728 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
729 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
730 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
731 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
732 			if (err) {
733 				t3_free_sge_resources(sc);
734 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
735 				    err);
736 				return (err);
737 			}
738 		}
739 	}
740 
741 	return (0);
742 }
743 
744 static void
745 cxgb_teardown_msix(adapter_t *sc)
746 {
747 	int i, nqsets;
748 
749 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
750 		nqsets += sc->port[i].nqsets;
751 
752 	for (i = 0; i < nqsets; i++) {
753 		if (sc->msix_intr_tag[i] != NULL) {
754 			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
755 			    sc->msix_intr_tag[i]);
756 			sc->msix_intr_tag[i] = NULL;
757 		}
758 		if (sc->msix_irq_res[i] != NULL) {
759 			bus_release_resource(sc->dev, SYS_RES_IRQ,
760 			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
761 			sc->msix_irq_res[i] = NULL;
762 		}
763 	}
764 }
765 
766 static int
767 cxgb_setup_msix(adapter_t *sc, int msix_count)
768 {
769 	int i, j, k, nqsets, rid;
770 
771 	/* The first message indicates link changes and error conditions */
772 	sc->irq_rid = 1;
773 	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
774 	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
775 		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
776 		return (EINVAL);
777 	}
778 
779 	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
780 #ifdef INTR_FILTERS
781 		NULL,
782 #endif
783 		cxgb_async_intr, sc, &sc->intr_tag)) {
784 		device_printf(sc->dev, "Cannot set up interrupt\n");
785 		return (EINVAL);
786 	}
787 	for (i = k = 0; i < (sc)->params.nports; i++) {
788 		nqsets = sc->port[i].nqsets;
789 		for (j = 0; j < nqsets; j++, k++) {
790 			struct sge_qset *qs = &sc->sge.qs[k];
791 
792 			rid = k + 2;
793 			if (cxgb_debug)
794 				printf("rid=%d ", rid);
795 			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
796 			    sc->dev, SYS_RES_IRQ, &rid,
797 			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
798 				device_printf(sc->dev, "Cannot allocate "
799 				    "interrupt for message %d\n", rid);
800 				return (EINVAL);
801 			}
802 			sc->msix_irq_rid[k] = rid;
803 			printf("setting up interrupt for port=%d\n",
804 			    qs->port->port_id);
805 			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
806 			    INTR_MPSAFE|INTR_TYPE_NET,
807 #ifdef INTR_FILTERS
808 				NULL,
809 #endif
810 				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
811 				device_printf(sc->dev, "Cannot set up "
812 				    "interrupt for message %d\n", rid);
813 				return (EINVAL);
814 			}
815 		}
816 	}
817 
818 
819 	return (0);
820 }
821 
822 static int
823 cxgb_port_probe(device_t dev)
824 {
825 	struct port_info *p;
826 	char buf[80];
827 
828 	p = device_get_softc(dev);
829 
830 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, p->port_type->desc);
831 	device_set_desc_copy(dev, buf);
832 	return (0);
833 }
834 
835 
836 static int
837 cxgb_makedev(struct port_info *pi)
838 {
839 
840 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
841 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
842 
843 	if (pi->port_cdev == NULL)
844 		return (ENOMEM);
845 
846 	pi->port_cdev->si_drv1 = (void *)pi;
847 
848 	return (0);
849 }
850 
851 
852 #ifdef TSO_SUPPORTED
853 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
854 /* Don't enable TSO6 yet */
855 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
856 #else
857 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
858 /* Don't enable TSO6 yet */
859 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
860 #define IFCAP_TSO4 0x0
861 #define IFCAP_TSO6 0x0
862 #define CSUM_TSO   0x0
863 #endif
864 
865 
866 static int
867 cxgb_port_attach(device_t dev)
868 {
869 	struct port_info *p;
870 	struct ifnet *ifp;
871 	int err, media_flags;
872 
873 	p = device_get_softc(dev);
874 
875 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
876 	    device_get_unit(device_get_parent(dev)), p->port_id);
877 	PORT_LOCK_INIT(p, p->lockbuf);
878 
879 	/* Allocate an ifnet object and set it up */
880 	ifp = p->ifp = if_alloc(IFT_ETHER);
881 	if (ifp == NULL) {
882 		device_printf(dev, "Cannot allocate ifnet\n");
883 		return (ENOMEM);
884 	}
885 
886 	/*
887 	 * Note that there is currently no watchdog timer.
888 	 */
889 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
890 	ifp->if_init = cxgb_init;
891 	ifp->if_softc = p;
892 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
893 	ifp->if_ioctl = cxgb_ioctl;
894 	ifp->if_start = cxgb_start;
895 	ifp->if_timer = 0;	/* Disable ifnet watchdog */
896 	ifp->if_watchdog = NULL;
897 
898 	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
899 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
900 	IFQ_SET_READY(&ifp->if_snd);
901 
902 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
903 	ifp->if_capabilities |= CXGB_CAP;
904 	ifp->if_capenable |= CXGB_CAP_ENABLE;
905 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
906 	/*
907 	 * disable TSO on 4-port - it isn't supported by the firmware yet
908 	 */
909 	if (p->adapter->params.nports > 2) {
910 		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
911 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
912 		ifp->if_hwassist &= ~CSUM_TSO;
913 	}
914 
915 	ether_ifattach(ifp, p->hw_addr);
916 	/*
917 	 * Only default to jumbo frames on 10GigE
918 	 */
919 	if (p->adapter->params.nports <= 2)
920 		ifp->if_mtu = 9000;
921 	if ((err = cxgb_makedev(p)) != 0) {
922 		printf("makedev failed %d\n", err);
923 		return (err);
924 	}
925 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
926 	    cxgb_media_status);
927 
928 	if (!strcmp(p->port_type->desc, "10GBASE-CX4")) {
929 		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
930 	} else if (!strcmp(p->port_type->desc, "10GBASE-SR")) {
931 		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
932 	} else if (!strcmp(p->port_type->desc, "10GBASE-XR")) {
933 		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
934 	} else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) {
935 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
936 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
937 			    0, NULL);
938 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
939 			    0, NULL);
940 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
941 			    0, NULL);
942 		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
943 			    0, NULL);
944 		media_flags = 0;
945 	} else {
946 	        printf("unsupported media type %s\n", p->port_type->desc);
947 		return (ENXIO);
948 	}
949 	if (media_flags) {
950 		ifmedia_add(&p->media, media_flags, 0, NULL);
951 		ifmedia_set(&p->media, media_flags);
952 	} else {
953 		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
954 		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
955 	}
956 
957 
958 	snprintf(p->taskqbuf, TASKQ_NAME_LEN, "cxgb_port_taskq%d", p->port_id);
959 #ifdef TASKQUEUE_CURRENT
960 	/* Create a port for handling TX without starvation */
961 	p->tq = taskqueue_create(p->taskqbuf, M_NOWAIT,
962 	    taskqueue_thread_enqueue, &p->tq);
963 #else
964 	/* Create a port for handling TX without starvation */
965 	p->tq = taskqueue_create_fast(p->taskqbuf, M_NOWAIT,
966 	    taskqueue_thread_enqueue, &p->tq);
967 #endif
968 
969 	if (p->tq == NULL) {
970 		device_printf(dev, "failed to allocate port task queue\n");
971 		return (ENOMEM);
972 	}
973 	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
974 	    device_get_nameunit(dev));
975 
976 	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
977 
978 	t3_sge_init_port(p);
979 
980 	return (0);
981 }
982 
983 static int
984 cxgb_port_detach(device_t dev)
985 {
986 	struct port_info *p;
987 
988 	p = device_get_softc(dev);
989 
990 	PORT_LOCK(p);
991 	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
992 		cxgb_stop_locked(p);
993 	PORT_UNLOCK(p);
994 
995 	if (p->tq != NULL) {
996 		taskqueue_drain(p->tq, &p->start_task);
997 		taskqueue_free(p->tq);
998 		p->tq = NULL;
999 	}
1000 
1001 	ether_ifdetach(p->ifp);
1002 	/*
1003 	 * the lock may be acquired in ifdetach
1004 	 */
1005 	PORT_LOCK_DEINIT(p);
1006 	if_free(p->ifp);
1007 
1008 	if (p->port_cdev != NULL)
1009 		destroy_dev(p->port_cdev);
1010 
1011 	return (0);
1012 }
1013 
1014 void
1015 t3_fatal_err(struct adapter *sc)
1016 {
1017 	u_int fw_status[4];
1018 
1019 	if (sc->flags & FULL_INIT_DONE) {
1020 		t3_sge_stop(sc);
1021 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1022 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1023 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1024 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1025 		t3_intr_disable(sc);
1026 	}
1027 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1028 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1029 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1030 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1031 }
1032 
1033 int
1034 t3_os_find_pci_capability(adapter_t *sc, int cap)
1035 {
1036 	device_t dev;
1037 	struct pci_devinfo *dinfo;
1038 	pcicfgregs *cfg;
1039 	uint32_t status;
1040 	uint8_t ptr;
1041 
1042 	dev = sc->dev;
1043 	dinfo = device_get_ivars(dev);
1044 	cfg = &dinfo->cfg;
1045 
1046 	status = pci_read_config(dev, PCIR_STATUS, 2);
1047 	if (!(status & PCIM_STATUS_CAPPRESENT))
1048 		return (0);
1049 
1050 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1051 	case 0:
1052 	case 1:
1053 		ptr = PCIR_CAP_PTR;
1054 		break;
1055 	case 2:
1056 		ptr = PCIR_CAP_PTR_2;
1057 		break;
1058 	default:
1059 		return (0);
1060 		break;
1061 	}
1062 	ptr = pci_read_config(dev, ptr, 1);
1063 
1064 	while (ptr != 0) {
1065 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1066 			return (ptr);
1067 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1068 	}
1069 
1070 	return (0);
1071 }
1072 
1073 int
1074 t3_os_pci_save_state(struct adapter *sc)
1075 {
1076 	device_t dev;
1077 	struct pci_devinfo *dinfo;
1078 
1079 	dev = sc->dev;
1080 	dinfo = device_get_ivars(dev);
1081 
1082 	pci_cfg_save(dev, dinfo, 0);
1083 	return (0);
1084 }
1085 
1086 int
1087 t3_os_pci_restore_state(struct adapter *sc)
1088 {
1089 	device_t dev;
1090 	struct pci_devinfo *dinfo;
1091 
1092 	dev = sc->dev;
1093 	dinfo = device_get_ivars(dev);
1094 
1095 	pci_cfg_restore(dev, dinfo);
1096 	return (0);
1097 }
1098 
1099 /**
1100  *	t3_os_link_changed - handle link status changes
1101  *	@adapter: the adapter associated with the link change
1102  *	@port_id: the port index whose limk status has changed
1103  *	@link_stat: the new status of the link
1104  *	@speed: the new speed setting
1105  *	@duplex: the new duplex setting
1106  *	@fc: the new flow-control setting
1107  *
1108  *	This is the OS-dependent handler for link status changes.  The OS
1109  *	neutral handler takes care of most of the processing for these events,
1110  *	then calls this handler for any OS-specific processing.
1111  */
1112 void
1113 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1114      int duplex, int fc)
1115 {
1116 	struct port_info *pi = &adapter->port[port_id];
1117 	struct cmac *mac = &adapter->port[port_id].mac;
1118 
1119 	if ((pi->ifp->if_flags & IFF_UP) == 0)
1120 		return;
1121 
1122 	if (link_status) {
1123 		t3_mac_enable(mac, MAC_DIRECTION_RX);
1124 		if_link_state_change(pi->ifp, LINK_STATE_UP);
1125 	} else {
1126 		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1127 		pi->phy.ops->power_down(&pi->phy, 1);
1128 		t3_mac_disable(mac, MAC_DIRECTION_RX);
1129 		t3_link_start(&pi->phy, mac, &pi->link_config);
1130 	}
1131 }
1132 
1133 /*
1134  * Interrupt-context handler for external (PHY) interrupts.
1135  */
1136 void
1137 t3_os_ext_intr_handler(adapter_t *sc)
1138 {
1139 	if (cxgb_debug)
1140 		printf("t3_os_ext_intr_handler\n");
1141 	/*
1142 	 * Schedule a task to handle external interrupts as they may be slow
1143 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1144 	 * interrupts in the meantime and let the task reenable them when
1145 	 * it's done.
1146 	 */
1147 	ADAPTER_LOCK(sc);
1148 	if (sc->slow_intr_mask) {
1149 		sc->slow_intr_mask &= ~F_T3DBG;
1150 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1151 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1152 	}
1153 	ADAPTER_UNLOCK(sc);
1154 }
1155 
1156 void
1157 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1158 {
1159 
1160 	/*
1161 	 * The ifnet might not be allocated before this gets called,
1162 	 * as this is called early on in attach by t3_prep_adapter
1163 	 * save the address off in the port structure
1164 	 */
1165 	if (cxgb_debug)
1166 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1167 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1168 }
1169 
1170 /**
1171  *	link_start - enable a port
1172  *	@p: the port to enable
1173  *
1174  *	Performs the MAC and PHY actions needed to enable a port.
1175  */
1176 static void
1177 cxgb_link_start(struct port_info *p)
1178 {
1179 	struct ifnet *ifp;
1180 	struct t3_rx_mode rm;
1181 	struct cmac *mac = &p->mac;
1182 
1183 	ifp = p->ifp;
1184 
1185 	t3_init_rx_mode(&rm, p);
1186 	if (!mac->multiport)
1187 		t3_mac_reset(mac);
1188 	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1189 	t3_mac_set_address(mac, 0, p->hw_addr);
1190 	t3_mac_set_rx_mode(mac, &rm);
1191 	t3_link_start(&p->phy, mac, &p->link_config);
1192 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1193 }
1194 
1195 /**
1196  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1197  *	@adap: the adapter
1198  *
1199  *	Sets up RSS to distribute packets to multiple receive queues.  We
1200  *	configure the RSS CPU lookup table to distribute to the number of HW
1201  *	receive queues, and the response queue lookup table to narrow that
1202  *	down to the response queues actually configured for each port.
1203  *	We always configure the RSS mapping for two ports since the mapping
1204  *	table has plenty of entries.
1205  */
1206 static void
1207 setup_rss(adapter_t *adap)
1208 {
1209 	int i;
1210 	u_int nq[2];
1211 	uint8_t cpus[SGE_QSETS + 1];
1212 	uint16_t rspq_map[RSS_TABLE_SIZE];
1213 
1214 	for (i = 0; i < SGE_QSETS; ++i)
1215 		cpus[i] = i;
1216 	cpus[SGE_QSETS] = 0xff;
1217 
1218 	nq[0] = nq[1] = 0;
1219 	for_each_port(adap, i) {
1220 		const struct port_info *pi = adap2pinfo(adap, i);
1221 
1222 		nq[pi->tx_chan] += pi->nqsets;
1223 	}
1224 	nq[0] = max(nq[0], 1U);
1225 	nq[1] = max(nq[1], 1U);
1226 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1227 		rspq_map[i] = i % nq[0];
1228 		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq[1]) + nq[0];
1229 	}
1230 	/* Calculate the reverse RSS map table */
1231 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1232 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1233 			adap->rrss_map[rspq_map[i]] = i;
1234 
1235 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1236 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1237 		      V_RRCPLCPUSIZE(6), cpus, rspq_map);
1238 
1239 }
1240 
1241 /*
1242  * Sends an mbuf to an offload queue driver
1243  * after dealing with any active network taps.
1244  */
1245 static inline int
1246 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1247 {
1248 	int ret;
1249 
1250 	critical_enter();
1251 	ret = t3_offload_tx(tdev, m);
1252 	critical_exit();
1253 	return (ret);
1254 }
1255 
1256 static int
1257 write_smt_entry(struct adapter *adapter, int idx)
1258 {
1259 	struct port_info *pi = &adapter->port[idx];
1260 	struct cpl_smt_write_req *req;
1261 	struct mbuf *m;
1262 
1263 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1264 		return (ENOMEM);
1265 
1266 	req = mtod(m, struct cpl_smt_write_req *);
1267 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1268 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1269 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1270 	req->iff = idx;
1271 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1272 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1273 
1274 	m_set_priority(m, 1);
1275 
1276 	offload_tx(&adapter->tdev, m);
1277 
1278 	return (0);
1279 }
1280 
1281 static int
1282 init_smt(struct adapter *adapter)
1283 {
1284 	int i;
1285 
1286 	for_each_port(adapter, i)
1287 		write_smt_entry(adapter, i);
1288 	return 0;
1289 }
1290 
1291 static void
1292 init_port_mtus(adapter_t *adapter)
1293 {
1294 	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1295 
1296 	if (adapter->port[1].ifp)
1297 		mtus |= adapter->port[1].ifp->if_mtu << 16;
1298 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1299 }
1300 
1301 static void
1302 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1303 			      int hi, int port)
1304 {
1305 	struct mbuf *m;
1306 	struct mngt_pktsched_wr *req;
1307 
1308 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1309 	if (m) {
1310 		req = mtod(m, struct mngt_pktsched_wr *);
1311 		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1312 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1313 		req->sched = sched;
1314 		req->idx = qidx;
1315 		req->min = lo;
1316 		req->max = hi;
1317 		req->binding = port;
1318 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1319 		t3_mgmt_tx(adap, m);
1320 	}
1321 }
1322 
1323 static void
1324 bind_qsets(adapter_t *sc)
1325 {
1326 	int i, j;
1327 
1328 	for (i = 0; i < (sc)->params.nports; ++i) {
1329 		const struct port_info *pi = adap2pinfo(sc, i);
1330 
1331 		for (j = 0; j < pi->nqsets; ++j) {
1332 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1333 					  -1, pi->tx_chan);
1334 
1335 		}
1336 	}
1337 }
1338 
1339 static void
1340 update_tpeeprom(struct adapter *adap)
1341 {
1342 #ifdef FIRMWARE_LATEST
1343 	const struct firmware *tpeeprom;
1344 #else
1345 	struct firmware *tpeeprom;
1346 #endif
1347 
1348 	char buf[64];
1349 	uint32_t version;
1350 	unsigned int major, minor;
1351 	int ret, len;
1352 	char rev;
1353 
1354 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1355 
1356 	major = G_TP_VERSION_MAJOR(version);
1357 	minor = G_TP_VERSION_MINOR(version);
1358 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1359 		return;
1360 
1361 	rev = t3rev2char(adap);
1362 
1363 	snprintf(buf, sizeof(buf), TPEEPROM_NAME, rev,
1364 		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1365 
1366 	tpeeprom = firmware_get(buf);
1367 	if (tpeeprom == NULL) {
1368 		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1369 			buf);
1370 		return;
1371 	}
1372 
1373 	len = tpeeprom->datasize - 4;
1374 
1375 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1376 	if (ret)
1377 		goto release_tpeeprom;
1378 
1379 	if (len != TP_SRAM_LEN) {
1380 		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", buf, len, TP_SRAM_LEN);
1381 		return;
1382 	}
1383 
1384 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1385 	    TP_SRAM_OFFSET);
1386 
1387 	if (!ret) {
1388 		device_printf(adap->dev,
1389 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1390 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1391 	} else
1392 		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1393 
1394 release_tpeeprom:
1395 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1396 
1397 	return;
1398 }
1399 
1400 static int
1401 update_tpsram(struct adapter *adap)
1402 {
1403 #ifdef FIRMWARE_LATEST
1404 	const struct firmware *tpsram;
1405 #else
1406 	struct firmware *tpsram;
1407 #endif
1408 	char buf[64];
1409 	int ret;
1410 	char rev;
1411 
1412 	rev = t3rev2char(adap);
1413 	if (!rev)
1414 		return 0;
1415 
1416 	update_tpeeprom(adap);
1417 
1418 	snprintf(buf, sizeof(buf), TPSRAM_NAME, rev,
1419 		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1420 
1421 	tpsram = firmware_get(buf);
1422 	if (tpsram == NULL){
1423 		device_printf(adap->dev, "could not load TP SRAM: unable to load %s\n",
1424 			buf);
1425 		return (EINVAL);
1426 	} else
1427 		device_printf(adap->dev, "updating TP SRAM with %s\n", buf);
1428 
1429 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1430 	if (ret)
1431 		goto release_tpsram;
1432 
1433 	ret = t3_set_proto_sram(adap, tpsram->data);
1434 	if (ret)
1435 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1436 
1437 release_tpsram:
1438 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1439 
1440 	return ret;
1441 }
1442 
1443 /**
1444  *	cxgb_up - enable the adapter
1445  *	@adap: adapter being enabled
1446  *
1447  *	Called when the first port is enabled, this function performs the
1448  *	actions necessary to make an adapter operational, such as completing
1449  *	the initialization of HW modules, and enabling interrupts.
1450  *
1451  */
1452 static int
1453 cxgb_up(struct adapter *sc)
1454 {
1455 	int err = 0;
1456 
1457 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1458 
1459 		if ((sc->flags & FW_UPTODATE) == 0)
1460 			if ((err = upgrade_fw(sc)))
1461 				goto out;
1462 		if ((sc->flags & TPS_UPTODATE) == 0)
1463 			if ((err = update_tpsram(sc)))
1464 				goto out;
1465 		err = t3_init_hw(sc, 0);
1466 		if (err)
1467 			goto out;
1468 
1469 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1470 
1471 		err = setup_sge_qsets(sc);
1472 		if (err)
1473 			goto out;
1474 
1475 		setup_rss(sc);
1476 		sc->flags |= FULL_INIT_DONE;
1477 	}
1478 
1479 	t3_intr_clear(sc);
1480 
1481 	/* If it's MSI or INTx, allocate a single interrupt for everything */
1482 	if ((sc->flags & USING_MSIX) == 0) {
1483 		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1484 		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1485 			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1486 			    sc->irq_rid);
1487 			err = EINVAL;
1488 			goto out;
1489 		}
1490 		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1491 
1492 		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1493 #ifdef INTR_FILTERS
1494 			NULL,
1495 #endif
1496 			sc->cxgb_intr, sc, &sc->intr_tag)) {
1497 			device_printf(sc->dev, "Cannot set up interrupt\n");
1498 			err = EINVAL;
1499 			goto irq_err;
1500 		}
1501 	} else {
1502 		cxgb_setup_msix(sc, sc->msi_count);
1503 	}
1504 
1505 	t3_sge_start(sc);
1506 	t3_intr_enable(sc);
1507 
1508 	if (!(sc->flags & QUEUES_BOUND)) {
1509 		printf("bind qsets\n");
1510 		bind_qsets(sc);
1511 		sc->flags |= QUEUES_BOUND;
1512 	}
1513 out:
1514 	return (err);
1515 irq_err:
1516 	CH_ERR(sc, "request_irq failed, err %d\n", err);
1517 	goto out;
1518 }
1519 
1520 
1521 /*
1522  * Release resources when all the ports and offloading have been stopped.
1523  */
1524 static void
1525 cxgb_down_locked(struct adapter *sc)
1526 {
1527 	int i;
1528 
1529 	t3_sge_stop(sc);
1530 	t3_intr_disable(sc);
1531 
1532 	if (sc->intr_tag != NULL) {
1533 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1534 		sc->intr_tag = NULL;
1535 	}
1536 	if (sc->irq_res != NULL) {
1537 		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1538 		    sc->irq_rid, sc->irq_res);
1539 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1540 		    sc->irq_res);
1541 		sc->irq_res = NULL;
1542 	}
1543 
1544 	if (sc->flags & USING_MSIX)
1545 		cxgb_teardown_msix(sc);
1546 	ADAPTER_UNLOCK(sc);
1547 
1548 	callout_drain(&sc->cxgb_tick_ch);
1549 	callout_drain(&sc->sge_timer_ch);
1550 
1551 	if (sc->tq != NULL) {
1552 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1553 		for (i = 0; i < sc->params.nports; i++)
1554 			taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task);
1555 	}
1556 #ifdef notyet
1557 
1558 		if (sc->port[i].tq != NULL)
1559 #endif
1560 
1561 }
1562 
1563 static int
1564 offload_open(struct port_info *pi)
1565 {
1566 	struct adapter *adapter = pi->adapter;
1567 	struct t3cdev *tdev = TOEDEV(pi->ifp);
1568 	int adap_up = adapter->open_device_map & PORT_MASK;
1569 	int err = 0;
1570 
1571 	if (atomic_cmpset_int(&adapter->open_device_map,
1572 		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1573 		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1574 		return (0);
1575 
1576 	ADAPTER_LOCK(pi->adapter);
1577 	if (!adap_up)
1578 		err = cxgb_up(adapter);
1579 	ADAPTER_UNLOCK(pi->adapter);
1580 	if (err)
1581 		return (err);
1582 
1583 	t3_tp_set_offload_mode(adapter, 1);
1584 	tdev->lldev = adapter->port[0].ifp;
1585 	err = cxgb_offload_activate(adapter);
1586 	if (err)
1587 		goto out;
1588 
1589 	init_port_mtus(adapter);
1590 	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1591 		     adapter->params.b_wnd,
1592 		     adapter->params.rev == 0 ?
1593 		       adapter->port[0].ifp->if_mtu : 0xffff);
1594 	init_smt(adapter);
1595 
1596 	/* Call back all registered clients */
1597 	cxgb_add_clients(tdev);
1598 
1599 out:
1600 	/* restore them in case the offload module has changed them */
1601 	if (err) {
1602 		t3_tp_set_offload_mode(adapter, 0);
1603 		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1604 		cxgb_set_dummy_ops(tdev);
1605 	}
1606 	return (err);
1607 }
1608 #ifdef notyet
1609 static int
1610 offload_close(struct t3cev *tdev)
1611 {
1612 	struct adapter *adapter = tdev2adap(tdev);
1613 
1614 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1615 		return (0);
1616 
1617 	/* Call back all registered clients */
1618 	cxgb_remove_clients(tdev);
1619 	tdev->lldev = NULL;
1620 	cxgb_set_dummy_ops(tdev);
1621 	t3_tp_set_offload_mode(adapter, 0);
1622 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1623 
1624 	if (!adapter->open_device_map)
1625 		cxgb_down(adapter);
1626 
1627 	cxgb_offload_deactivate(adapter);
1628 	return (0);
1629 }
1630 #endif
1631 
1632 static void
1633 cxgb_init(void *arg)
1634 {
1635 	struct port_info *p = arg;
1636 
1637 	PORT_LOCK(p);
1638 	cxgb_init_locked(p);
1639 	PORT_UNLOCK(p);
1640 }
1641 
1642 static void
1643 cxgb_init_locked(struct port_info *p)
1644 {
1645 	struct ifnet *ifp;
1646 	adapter_t *sc = p->adapter;
1647 	int err;
1648 
1649 	PORT_LOCK_ASSERT_OWNED(p);
1650 	ifp = p->ifp;
1651 
1652 	ADAPTER_LOCK(p->adapter);
1653 	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1654 		ADAPTER_UNLOCK(p->adapter);
1655 		cxgb_stop_locked(p);
1656 		return;
1657 	}
1658 	if (p->adapter->open_device_map == 0) {
1659 		t3_intr_clear(sc);
1660 		t3_sge_init_adapter(sc);
1661 	}
1662 	setbit(&p->adapter->open_device_map, p->port_id);
1663 	ADAPTER_UNLOCK(p->adapter);
1664 
1665 	if (is_offload(sc) && !ofld_disable) {
1666 		err = offload_open(p);
1667 		if (err)
1668 			log(LOG_WARNING,
1669 			    "Could not initialize offload capabilities\n");
1670 	}
1671 	cxgb_link_start(p);
1672 	t3_link_changed(sc, p->port_id);
1673 	ifp->if_baudrate = p->link_config.speed * 1000000;
1674 
1675 	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1676 	t3_port_intr_enable(sc, p->port_id);
1677 
1678 	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1679 	    cxgb_tick, sc);
1680 
1681 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1682 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1683 }
1684 
1685 static void
1686 cxgb_set_rxmode(struct port_info *p)
1687 {
1688 	struct t3_rx_mode rm;
1689 	struct cmac *mac = &p->mac;
1690 
1691 	PORT_LOCK_ASSERT_OWNED(p);
1692 
1693 	t3_init_rx_mode(&rm, p);
1694 	t3_mac_set_rx_mode(mac, &rm);
1695 }
1696 
1697 static void
1698 cxgb_stop_locked(struct port_info *p)
1699 {
1700 	struct ifnet *ifp;
1701 
1702 	PORT_LOCK_ASSERT_OWNED(p);
1703 	ADAPTER_LOCK_ASSERT_NOTOWNED(p->adapter);
1704 
1705 	ifp = p->ifp;
1706 
1707 	t3_port_intr_disable(p->adapter, p->port_id);
1708 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1709 	p->phy.ops->power_down(&p->phy, 1);
1710 	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1711 
1712 	ADAPTER_LOCK(p->adapter);
1713 	clrbit(&p->adapter->open_device_map, p->port_id);
1714 
1715 
1716 	if (p->adapter->open_device_map == 0) {
1717 		cxgb_down_locked(p->adapter);
1718 	} else
1719 		ADAPTER_UNLOCK(p->adapter);
1720 
1721 }
1722 
1723 static int
1724 cxgb_set_mtu(struct port_info *p, int mtu)
1725 {
1726 	struct ifnet *ifp = p->ifp;
1727 	int error = 0;
1728 
1729 	if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
1730 		error = EINVAL;
1731 	else if (ifp->if_mtu != mtu) {
1732 		PORT_LOCK(p);
1733 		ifp->if_mtu = mtu;
1734 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1735 			callout_stop(&p->adapter->cxgb_tick_ch);
1736 			cxgb_stop_locked(p);
1737 			cxgb_init_locked(p);
1738 		}
1739 		PORT_UNLOCK(p);
1740 	}
1741 	return (error);
1742 }
1743 
1744 static int
1745 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1746 {
1747 	struct port_info *p = ifp->if_softc;
1748 	struct ifaddr *ifa = (struct ifaddr *)data;
1749 	struct ifreq *ifr = (struct ifreq *)data;
1750 	int flags, error = 0;
1751 	uint32_t mask;
1752 
1753 	/*
1754 	 * XXX need to check that we aren't in the middle of an unload
1755 	 */
1756 	switch (command) {
1757 	case SIOCSIFMTU:
1758 		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1759 		break;
1760 	case SIOCSIFADDR:
1761 	case SIOCGIFADDR:
1762 		PORT_LOCK(p);
1763 		if (ifa->ifa_addr->sa_family == AF_INET) {
1764 			ifp->if_flags |= IFF_UP;
1765 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1766 				cxgb_init_locked(p);
1767 			arp_ifinit(ifp, ifa);
1768 		} else
1769 			error = ether_ioctl(ifp, command, data);
1770 		PORT_UNLOCK(p);
1771 		break;
1772 	case SIOCSIFFLAGS:
1773 		callout_drain(&p->adapter->cxgb_tick_ch);
1774 		PORT_LOCK(p);
1775 		if (ifp->if_flags & IFF_UP) {
1776 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1777 				flags = p->if_flags;
1778 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1779 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1780 					cxgb_set_rxmode(p);
1781 			} else
1782 				cxgb_init_locked(p);
1783 			p->if_flags = ifp->if_flags;
1784 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1785 			cxgb_stop_locked(p);
1786 
1787 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1788 			adapter_t *sc = p->adapter;
1789 			callout_reset(&sc->cxgb_tick_ch,
1790 			    sc->params.stats_update_period * hz,
1791 			    cxgb_tick, sc);
1792 		}
1793 		PORT_UNLOCK(p);
1794 		break;
1795 	case SIOCSIFMEDIA:
1796 	case SIOCGIFMEDIA:
1797 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1798 		break;
1799 	case SIOCSIFCAP:
1800 		PORT_LOCK(p);
1801 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1802 		if (mask & IFCAP_TXCSUM) {
1803 			if (IFCAP_TXCSUM & ifp->if_capenable) {
1804 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1805 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1806 				    | CSUM_TSO);
1807 			} else {
1808 				ifp->if_capenable |= IFCAP_TXCSUM;
1809 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1810 			}
1811 		} else if (mask & IFCAP_RXCSUM) {
1812 			if (IFCAP_RXCSUM & ifp->if_capenable) {
1813 				ifp->if_capenable &= ~IFCAP_RXCSUM;
1814 			} else {
1815 				ifp->if_capenable |= IFCAP_RXCSUM;
1816 			}
1817 		}
1818 		if (mask & IFCAP_TSO4) {
1819 			if (IFCAP_TSO4 & ifp->if_capenable) {
1820 				ifp->if_capenable &= ~IFCAP_TSO4;
1821 				ifp->if_hwassist &= ~CSUM_TSO;
1822 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1823 				ifp->if_capenable |= IFCAP_TSO4;
1824 				ifp->if_hwassist |= CSUM_TSO;
1825 			} else {
1826 				if (cxgb_debug)
1827 					printf("cxgb requires tx checksum offload"
1828 					    " be enabled to use TSO\n");
1829 				error = EINVAL;
1830 			}
1831 		}
1832 		PORT_UNLOCK(p);
1833 		break;
1834 	default:
1835 		error = ether_ioctl(ifp, command, data);
1836 		break;
1837 	}
1838 	return (error);
1839 }
1840 
1841 static int
1842 cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1843 {
1844 	struct sge_qset *qs;
1845 	struct sge_txq *txq;
1846 	struct port_info *p = ifp->if_softc;
1847 	struct mbuf *m = NULL;
1848 	int err, in_use_init, free;
1849 
1850 	if (!p->link_config.link_ok)
1851 		return (ENXIO);
1852 
1853 	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1854 		return (ENOBUFS);
1855 
1856 	qs = &p->adapter->sge.qs[p->first_qset];
1857 	txq = &qs->txq[TXQ_ETH];
1858 	err = 0;
1859 
1860 	if (txq->flags & TXQ_TRANSMITTING)
1861 		return (EINPROGRESS);
1862 
1863 	mtx_lock(&txq->lock);
1864 	txq->flags |= TXQ_TRANSMITTING;
1865 	in_use_init = txq->in_use;
1866 	while ((txq->in_use - in_use_init < txmax) &&
1867 	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1868 		free = 0;
1869 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1870 		if (m == NULL)
1871 			break;
1872 		/*
1873 		 * Convert chain to M_IOVEC
1874 		 */
1875 		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1876 #ifdef notyet
1877 		m0 = m;
1878 		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1879 		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1880 			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1881 				m = m0;
1882 				m_collapse(m, TX_MAX_SEGS, &m0);
1883 			} else
1884 				break;
1885 		}
1886 		m = m0;
1887 #endif
1888 		if ((err = t3_encap(p, &m, &free)) != 0)
1889 			break;
1890 		BPF_MTAP(ifp, m);
1891 		if (free)
1892 			m_freem(m);
1893 	}
1894 	txq->flags &= ~TXQ_TRANSMITTING;
1895 	mtx_unlock(&txq->lock);
1896 
1897 	if (__predict_false(err)) {
1898 		if (err == ENOMEM) {
1899 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1900 			IFQ_LOCK(&ifp->if_snd);
1901 			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1902 			IFQ_UNLOCK(&ifp->if_snd);
1903 		}
1904 	}
1905 	if (err == 0 && m == NULL)
1906 		err = ENOBUFS;
1907 	else if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1908 	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1909 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1910 		err = ENOSPC;
1911 	}
1912 	return (err);
1913 }
1914 
1915 static void
1916 cxgb_start_proc(void *arg, int ncount)
1917 {
1918 	struct ifnet *ifp = arg;
1919 	struct port_info *pi = ifp->if_softc;
1920 	struct sge_qset *qs;
1921 	struct sge_txq *txq;
1922 	int error;
1923 
1924 	qs = &pi->adapter->sge.qs[pi->first_qset];
1925 	txq = &qs->txq[TXQ_ETH];
1926 
1927 	do {
1928 		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2)
1929 			taskqueue_enqueue(pi->tq, &txq->qreclaim_task);
1930 
1931 		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1932 	} while (error == 0);
1933 }
1934 
1935 static void
1936 cxgb_start(struct ifnet *ifp)
1937 {
1938 	struct port_info *pi = ifp->if_softc;
1939 	struct sge_qset *qs;
1940 	struct sge_txq *txq;
1941 	int err;
1942 
1943 	qs = &pi->adapter->sge.qs[pi->first_qset];
1944 	txq = &qs->txq[TXQ_ETH];
1945 
1946 	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2)
1947 		taskqueue_enqueue(pi->tq,
1948 		    &txq->qreclaim_task);
1949 
1950 	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1951 
1952 	if (err == 0)
1953 		taskqueue_enqueue(pi->tq, &pi->start_task);
1954 }
1955 
1956 
1957 static int
1958 cxgb_media_change(struct ifnet *ifp)
1959 {
1960 	if_printf(ifp, "media change not supported\n");
1961 	return (ENXIO);
1962 }
1963 
1964 static void
1965 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1966 {
1967 	struct port_info *p = ifp->if_softc;
1968 
1969 	ifmr->ifm_status = IFM_AVALID;
1970 	ifmr->ifm_active = IFM_ETHER;
1971 
1972 	if (!p->link_config.link_ok)
1973 		return;
1974 
1975 	ifmr->ifm_status |= IFM_ACTIVE;
1976 
1977 	switch (p->link_config.speed) {
1978 	case 10:
1979 		ifmr->ifm_active |= IFM_10_T;
1980 		break;
1981 	case 100:
1982 		ifmr->ifm_active |= IFM_100_TX;
1983 			break;
1984 	case 1000:
1985 		ifmr->ifm_active |= IFM_1000_T;
1986 		break;
1987 	}
1988 
1989 	if (p->link_config.duplex)
1990 		ifmr->ifm_active |= IFM_FDX;
1991 	else
1992 		ifmr->ifm_active |= IFM_HDX;
1993 }
1994 
1995 static void
1996 cxgb_async_intr(void *data)
1997 {
1998 	adapter_t *sc = data;
1999 
2000 	if (cxgb_debug)
2001 		device_printf(sc->dev, "cxgb_async_intr\n");
2002 	/*
2003 	 * May need to sleep - defer to taskqueue
2004 	 */
2005 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2006 }
2007 
2008 static void
2009 cxgb_ext_intr_handler(void *arg, int count)
2010 {
2011 	adapter_t *sc = (adapter_t *)arg;
2012 
2013 	if (cxgb_debug)
2014 		printf("cxgb_ext_intr_handler\n");
2015 
2016 	t3_phy_intr_handler(sc);
2017 
2018 	/* Now reenable external interrupts */
2019 	ADAPTER_LOCK(sc);
2020 	if (sc->slow_intr_mask) {
2021 		sc->slow_intr_mask |= F_T3DBG;
2022 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2023 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2024 	}
2025 	ADAPTER_UNLOCK(sc);
2026 }
2027 
2028 static void
2029 check_link_status(adapter_t *sc)
2030 {
2031 	int i;
2032 
2033 	for (i = 0; i < (sc)->params.nports; ++i) {
2034 		struct port_info *p = &sc->port[i];
2035 
2036 		if (!(p->port_type->caps & SUPPORTED_IRQ))
2037 			t3_link_changed(sc, i);
2038 		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2039 	}
2040 }
2041 
2042 static void
2043 check_t3b2_mac(struct adapter *adapter)
2044 {
2045 	int i;
2046 
2047 	for_each_port(adapter, i) {
2048 		struct port_info *p = &adapter->port[i];
2049 		struct ifnet *ifp = p->ifp;
2050 		int status;
2051 
2052 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2053 			continue;
2054 
2055 		status = 0;
2056 		PORT_LOCK(p);
2057 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2058 			status = t3b2_mac_watchdog_task(&p->mac);
2059 		if (status == 1)
2060 			p->mac.stats.num_toggled++;
2061 		else if (status == 2) {
2062 			struct cmac *mac = &p->mac;
2063 
2064 			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
2065 			    + ETHER_VLAN_ENCAP_LEN);
2066 			t3_mac_set_address(mac, 0, p->hw_addr);
2067 			cxgb_set_rxmode(p);
2068 			t3_link_start(&p->phy, mac, &p->link_config);
2069 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2070 			t3_port_intr_enable(adapter, p->port_id);
2071 			p->mac.stats.num_resets++;
2072 		}
2073 		PORT_UNLOCK(p);
2074 	}
2075 }
2076 
2077 static void
2078 cxgb_tick(void *arg)
2079 {
2080 	adapter_t *sc = (adapter_t *)arg;
2081 
2082 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2083 
2084 	if (sc->open_device_map != 0)
2085 		callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
2086 		    cxgb_tick, sc);
2087 }
2088 
2089 static void
2090 cxgb_tick_handler(void *arg, int count)
2091 {
2092 	adapter_t *sc = (adapter_t *)arg;
2093 	const struct adapter_params *p = &sc->params;
2094 
2095 	ADAPTER_LOCK(sc);
2096 	if (p->linkpoll_period)
2097 		check_link_status(sc);
2098 
2099 	/*
2100 	 * adapter lock can currently only be acquire after the
2101 	 * port lock
2102 	 */
2103 	ADAPTER_UNLOCK(sc);
2104 
2105 	if (p->rev == T3_REV_B2 && p->nports < 4)
2106 		check_t3b2_mac(sc);
2107 }
2108 
2109 static void
2110 touch_bars(device_t dev)
2111 {
2112 	/*
2113 	 * Don't enable yet
2114 	 */
2115 #if !defined(__LP64__) && 0
2116 	u32 v;
2117 
2118 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2119 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2120 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2121 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2122 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2123 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2124 #endif
2125 }
2126 
2127 static int
2128 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2129 {
2130 	uint8_t *buf;
2131 	int err = 0;
2132 	u32 aligned_offset, aligned_len, *p;
2133 	struct adapter *adapter = pi->adapter;
2134 
2135 
2136 	aligned_offset = offset & ~3;
2137 	aligned_len = (len + (offset & 3) + 3) & ~3;
2138 
2139 	if (aligned_offset != offset || aligned_len != len) {
2140 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2141 		if (!buf)
2142 			return (ENOMEM);
2143 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2144 		if (!err && aligned_len > 4)
2145 			err = t3_seeprom_read(adapter,
2146 					      aligned_offset + aligned_len - 4,
2147 					      (u32 *)&buf[aligned_len - 4]);
2148 		if (err)
2149 			goto out;
2150 		memcpy(buf + (offset & 3), data, len);
2151 	} else
2152 		buf = (uint8_t *)(uintptr_t)data;
2153 
2154 	err = t3_seeprom_wp(adapter, 0);
2155 	if (err)
2156 		goto out;
2157 
2158 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2159 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2160 		aligned_offset += 4;
2161 	}
2162 
2163 	if (!err)
2164 		err = t3_seeprom_wp(adapter, 1);
2165 out:
2166 	if (buf != data)
2167 		free(buf, M_DEVBUF);
2168 	return err;
2169 }
2170 
2171 
2172 static int
2173 in_range(int val, int lo, int hi)
2174 {
2175 	return val < 0 || (val <= hi && val >= lo);
2176 }
2177 
2178 static int
2179 cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2180 {
2181        return (0);
2182 }
2183 
2184 static int
2185 cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2186 {
2187        return (0);
2188 }
2189 
2190 static int
2191 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2192     int fflag, struct thread *td)
2193 {
2194 	int mmd, error = 0;
2195 	struct port_info *pi = dev->si_drv1;
2196 	adapter_t *sc = pi->adapter;
2197 
2198 #ifdef PRIV_SUPPORTED
2199 	if (priv_check(td, PRIV_DRIVER)) {
2200 		if (cxgb_debug)
2201 			printf("user does not have access to privileged ioctls\n");
2202 		return (EPERM);
2203 	}
2204 #else
2205 	if (suser(td)) {
2206 		if (cxgb_debug)
2207 			printf("user does not have access to privileged ioctls\n");
2208 		return (EPERM);
2209 	}
2210 #endif
2211 
2212 	switch (cmd) {
2213 	case SIOCGMIIREG: {
2214 		uint32_t val;
2215 		struct cphy *phy = &pi->phy;
2216 		struct mii_data *mid = (struct mii_data *)data;
2217 
2218 		if (!phy->mdio_read)
2219 			return (EOPNOTSUPP);
2220 		if (is_10G(sc)) {
2221 			mmd = mid->phy_id >> 8;
2222 			if (!mmd)
2223 				mmd = MDIO_DEV_PCS;
2224 			else if (mmd > MDIO_DEV_XGXS)
2225 				return (EINVAL);
2226 
2227 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2228 					     mid->reg_num, &val);
2229 		} else
2230 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2231 					     mid->reg_num & 0x1f, &val);
2232 		if (error == 0)
2233 			mid->val_out = val;
2234 		break;
2235 	}
2236 	case SIOCSMIIREG: {
2237 		struct cphy *phy = &pi->phy;
2238 		struct mii_data *mid = (struct mii_data *)data;
2239 
2240 		if (!phy->mdio_write)
2241 			return (EOPNOTSUPP);
2242 		if (is_10G(sc)) {
2243 			mmd = mid->phy_id >> 8;
2244 			if (!mmd)
2245 				mmd = MDIO_DEV_PCS;
2246 			else if (mmd > MDIO_DEV_XGXS)
2247 				return (EINVAL);
2248 
2249 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2250 					      mmd, mid->reg_num, mid->val_in);
2251 		} else
2252 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2253 					      mid->reg_num & 0x1f,
2254 					      mid->val_in);
2255 		break;
2256 	}
2257 	case CHELSIO_SETREG: {
2258 		struct ch_reg *edata = (struct ch_reg *)data;
2259 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2260 			return (EFAULT);
2261 		t3_write_reg(sc, edata->addr, edata->val);
2262 		break;
2263 	}
2264 	case CHELSIO_GETREG: {
2265 		struct ch_reg *edata = (struct ch_reg *)data;
2266 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2267 			return (EFAULT);
2268 		edata->val = t3_read_reg(sc, edata->addr);
2269 		break;
2270 	}
2271 	case CHELSIO_GET_SGE_CONTEXT: {
2272 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2273 		mtx_lock(&sc->sge.reg_lock);
2274 		switch (ecntxt->cntxt_type) {
2275 		case CNTXT_TYPE_EGRESS:
2276 			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2277 			    ecntxt->data);
2278 			break;
2279 		case CNTXT_TYPE_FL:
2280 			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
2281 			    ecntxt->data);
2282 			break;
2283 		case CNTXT_TYPE_RSP:
2284 			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2285 			    ecntxt->data);
2286 			break;
2287 		case CNTXT_TYPE_CQ:
2288 			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
2289 			    ecntxt->data);
2290 			break;
2291 		default:
2292 			error = EINVAL;
2293 			break;
2294 		}
2295 		mtx_unlock(&sc->sge.reg_lock);
2296 		break;
2297 	}
2298 	case CHELSIO_GET_SGE_DESC: {
2299 		struct ch_desc *edesc = (struct ch_desc *)data;
2300 		int ret;
2301 		if (edesc->queue_num >= SGE_QSETS * 6)
2302 			return (EINVAL);
2303 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2304 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2305 		if (ret < 0)
2306 			return (EINVAL);
2307 		edesc->size = ret;
2308 		break;
2309 	}
2310 	case CHELSIO_SET_QSET_PARAMS: {
2311 		struct qset_params *q;
2312 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2313 
2314 		if (t->qset_idx >= SGE_QSETS)
2315 			return (EINVAL);
2316 		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
2317 		    !in_range(t->cong_thres, 0, 255) ||
2318 		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
2319 			      MAX_TXQ_ENTRIES) ||
2320 		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
2321 			      MAX_TXQ_ENTRIES) ||
2322 		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
2323 			      MAX_CTRL_TXQ_ENTRIES) ||
2324 		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
2325 		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
2326 			      MAX_RX_JUMBO_BUFFERS) ||
2327 		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
2328 			return (EINVAL);
2329 		if ((sc->flags & FULL_INIT_DONE) &&
2330 		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2331 		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2332 		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2333 		     t->polling >= 0 || t->cong_thres >= 0))
2334 			return (EBUSY);
2335 
2336 		q = &sc->params.sge.qset[t->qset_idx];
2337 
2338 		if (t->rspq_size >= 0)
2339 			q->rspq_size = t->rspq_size;
2340 		if (t->fl_size[0] >= 0)
2341 			q->fl_size = t->fl_size[0];
2342 		if (t->fl_size[1] >= 0)
2343 			q->jumbo_size = t->fl_size[1];
2344 		if (t->txq_size[0] >= 0)
2345 			q->txq_size[0] = t->txq_size[0];
2346 		if (t->txq_size[1] >= 0)
2347 			q->txq_size[1] = t->txq_size[1];
2348 		if (t->txq_size[2] >= 0)
2349 			q->txq_size[2] = t->txq_size[2];
2350 		if (t->cong_thres >= 0)
2351 			q->cong_thres = t->cong_thres;
2352 		if (t->intr_lat >= 0) {
2353 			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2354 
2355 			q->coalesce_nsecs = t->intr_lat*1000;
2356 			t3_update_qset_coalesce(qs, q);
2357 		}
2358 		break;
2359 	}
2360 	case CHELSIO_GET_QSET_PARAMS: {
2361 		struct qset_params *q;
2362 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2363 
2364 		if (t->qset_idx >= SGE_QSETS)
2365 			return (EINVAL);
2366 
2367 		q = &(sc)->params.sge.qset[t->qset_idx];
2368 		t->rspq_size   = q->rspq_size;
2369 		t->txq_size[0] = q->txq_size[0];
2370 		t->txq_size[1] = q->txq_size[1];
2371 		t->txq_size[2] = q->txq_size[2];
2372 		t->fl_size[0]  = q->fl_size;
2373 		t->fl_size[1]  = q->jumbo_size;
2374 		t->polling     = q->polling;
2375 		t->intr_lat    = q->coalesce_nsecs / 1000;
2376 		t->cong_thres  = q->cong_thres;
2377 		break;
2378 	}
2379 	case CHELSIO_SET_QSET_NUM: {
2380 		struct ch_reg *edata = (struct ch_reg *)data;
2381 		unsigned int port_idx = pi->port_id;
2382 
2383 		if (sc->flags & FULL_INIT_DONE)
2384 			return (EBUSY);
2385 		if (edata->val < 1 ||
2386 		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2387 			return (EINVAL);
2388 		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2389 			return (EINVAL);
2390 		sc->port[port_idx].nqsets = edata->val;
2391 		sc->port[0].first_qset = 0;
2392 		/*
2393 		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2394 		 */
2395 		sc->port[1].first_qset = sc->port[0].nqsets;
2396 		break;
2397 	}
2398 	case CHELSIO_GET_QSET_NUM: {
2399 		struct ch_reg *edata = (struct ch_reg *)data;
2400 		edata->val = pi->nqsets;
2401 		break;
2402 	}
2403 #ifdef notyet
2404 	case CHELSIO_LOAD_FW:
2405 	case CHELSIO_GET_PM:
2406 	case CHELSIO_SET_PM:
2407 		return (EOPNOTSUPP);
2408 		break;
2409 #endif
2410 	case CHELSIO_SETMTUTAB: {
2411 		struct ch_mtus *m = (struct ch_mtus *)data;
2412 		int i;
2413 
2414 		if (!is_offload(sc))
2415 			return (EOPNOTSUPP);
2416 		if (offload_running(sc))
2417 			return (EBUSY);
2418 		if (m->nmtus != NMTUS)
2419 			return (EINVAL);
2420 		if (m->mtus[0] < 81)         /* accommodate SACK */
2421 			return (EINVAL);
2422 
2423 		/*
2424 		 * MTUs must be in ascending order
2425 		 */
2426 		for (i = 1; i < NMTUS; ++i)
2427 			if (m->mtus[i] < m->mtus[i - 1])
2428 				return (EINVAL);
2429 
2430 		memcpy(sc->params.mtus, m->mtus,
2431 		       sizeof(sc->params.mtus));
2432 		break;
2433 	}
2434 	case CHELSIO_GETMTUTAB: {
2435 		struct ch_mtus *m = (struct ch_mtus *)data;
2436 
2437 		if (!is_offload(sc))
2438 			return (EOPNOTSUPP);
2439 
2440 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2441 		m->nmtus = NMTUS;
2442 		break;
2443 	}
2444 	case CHELSIO_DEVUP:
2445 		if (!is_offload(sc))
2446 			return (EOPNOTSUPP);
2447 		return offload_open(pi);
2448 		break;
2449 	case CHELSIO_GET_MEM: {
2450 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2451 		struct mc7 *mem;
2452 		uint8_t *useraddr;
2453 		u64 buf[32];
2454 
2455 		if (!is_offload(sc))
2456 			return (EOPNOTSUPP);
2457 		if (!(sc->flags & FULL_INIT_DONE))
2458 			return (EIO);         /* need the memory controllers */
2459 		if ((t->addr & 0x7) || (t->len & 0x7))
2460 			return (EINVAL);
2461 		if (t->mem_id == MEM_CM)
2462 			mem = &sc->cm;
2463 		else if (t->mem_id == MEM_PMRX)
2464 			mem = &sc->pmrx;
2465 		else if (t->mem_id == MEM_PMTX)
2466 			mem = &sc->pmtx;
2467 		else
2468 			return (EINVAL);
2469 
2470 		/*
2471 		 * Version scheme:
2472 		 * bits 0..9: chip version
2473 		 * bits 10..15: chip revision
2474 		 */
2475 		t->version = 3 | (sc->params.rev << 10);
2476 
2477 		/*
2478 		 * Read 256 bytes at a time as len can be large and we don't
2479 		 * want to use huge intermediate buffers.
2480 		 */
2481 		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2482 		while (t->len) {
2483 			unsigned int chunk = min(t->len, sizeof(buf));
2484 
2485 			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2486 			if (error)
2487 				return (-error);
2488 			if (copyout(buf, useraddr, chunk))
2489 				return (EFAULT);
2490 			useraddr += chunk;
2491 			t->addr += chunk;
2492 			t->len -= chunk;
2493 		}
2494 		break;
2495 	}
2496 	case CHELSIO_READ_TCAM_WORD: {
2497 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2498 
2499 		if (!is_offload(sc))
2500 			return (EOPNOTSUPP);
2501 		if (!(sc->flags & FULL_INIT_DONE))
2502 			return (EIO);         /* need MC5 */
2503 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2504 		break;
2505 	}
2506 	case CHELSIO_SET_TRACE_FILTER: {
2507 		struct ch_trace *t = (struct ch_trace *)data;
2508 		const struct trace_params *tp;
2509 
2510 		tp = (const struct trace_params *)&t->sip;
2511 		if (t->config_tx)
2512 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2513 					       t->trace_tx);
2514 		if (t->config_rx)
2515 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2516 					       t->trace_rx);
2517 		break;
2518 	}
2519 	case CHELSIO_SET_PKTSCHED: {
2520 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2521 		if (sc->open_device_map == 0)
2522 			return (EAGAIN);
2523 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2524 		    p->binding);
2525 		break;
2526 	}
2527 	case CHELSIO_IFCONF_GETREGS: {
2528 		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2529 		int reglen = cxgb_get_regs_len();
2530 		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2531 		if (buf == NULL) {
2532 			return (ENOMEM);
2533 		} if (regs->len > reglen)
2534 			regs->len = reglen;
2535 		else if (regs->len < reglen) {
2536 			error = E2BIG;
2537 			goto done;
2538 		}
2539 		cxgb_get_regs(sc, regs, buf);
2540 		error = copyout(buf, regs->data, reglen);
2541 
2542 		done:
2543 		free(buf, M_DEVBUF);
2544 
2545 		break;
2546 	}
2547 	case CHELSIO_SET_HW_SCHED: {
2548 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2549 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2550 
2551 		if ((sc->flags & FULL_INIT_DONE) == 0)
2552 			return (EAGAIN);       /* need TP to be initialized */
2553 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2554 		    !in_range(t->channel, 0, 1) ||
2555 		    !in_range(t->kbps, 0, 10000000) ||
2556 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2557 		    !in_range(t->flow_ipg, 0,
2558 			      dack_ticks_to_usec(sc, 0x7ff)))
2559 			return (EINVAL);
2560 
2561 		if (t->kbps >= 0) {
2562 			error = t3_config_sched(sc, t->kbps, t->sched);
2563 			if (error < 0)
2564 				return (-error);
2565 		}
2566 		if (t->class_ipg >= 0)
2567 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2568 		if (t->flow_ipg >= 0) {
2569 			t->flow_ipg *= 1000;     /* us -> ns */
2570 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2571 		}
2572 		if (t->mode >= 0) {
2573 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2574 
2575 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2576 					 bit, t->mode ? bit : 0);
2577 		}
2578 		if (t->channel >= 0)
2579 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2580 					 1 << t->sched, t->channel << t->sched);
2581 		break;
2582 	}
2583 	default:
2584 		return (EOPNOTSUPP);
2585 		break;
2586 	}
2587 
2588 	return (error);
2589 }
2590 
2591 static __inline void
2592 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2593     unsigned int end)
2594 {
2595 	uint32_t *p = (uint32_t *)buf + start;
2596 
2597 	for ( ; start <= end; start += sizeof(uint32_t))
2598 		*p++ = t3_read_reg(ap, start);
2599 }
2600 
2601 #define T3_REGMAP_SIZE (3 * 1024)
2602 static int
2603 cxgb_get_regs_len(void)
2604 {
2605 	return T3_REGMAP_SIZE;
2606 }
2607 #undef T3_REGMAP_SIZE
2608 
2609 static void
2610 cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2611 {
2612 
2613 	/*
2614 	 * Version scheme:
2615 	 * bits 0..9: chip version
2616 	 * bits 10..15: chip revision
2617 	 * bit 31: set for PCIe cards
2618 	 */
2619 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2620 
2621 	/*
2622 	 * We skip the MAC statistics registers because they are clear-on-read.
2623 	 * Also reading multi-register stats would need to synchronize with the
2624 	 * periodic mac stats accumulation.  Hard to justify the complexity.
2625 	 */
2626 	memset(buf, 0, REGDUMP_SIZE);
2627 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2628 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2629 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2630 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2631 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2632 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2633 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2634 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2635 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2636 }
2637