xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 160e76972a85398e1d8d19143c145cb8a6f6942e)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12 2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/rman.h>
44 #include <sys/ioccom.h>
45 #include <sys/mbuf.h>
46 #include <sys/linker.h>
47 #include <sys/firmware.h>
48 #include <sys/socket.h>
49 #include <sys/sockio.h>
50 #include <sys/smp.h>
51 #include <sys/sysctl.h>
52 #include <sys/syslog.h>
53 #include <sys/queue.h>
54 #include <sys/taskqueue.h>
55 #include <sys/proc.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_arp.h>
61 #include <net/if_dl.h>
62 #include <net/if_media.h>
63 #include <net/if_types.h>
64 
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip.h>
70 #include <netinet/tcp.h>
71 #include <netinet/udp.h>
72 
73 #include <dev/pci/pcireg.h>
74 #include <dev/pci/pcivar.h>
75 #include <dev/pci/pci_private.h>
76 
77 #ifdef CONFIG_DEFINED
78 #include <cxgb_include.h>
79 #else
80 #include <dev/cxgb/cxgb_include.h>
81 #endif
82 
83 #ifdef PRIV_SUPPORTED
84 #include <sys/priv.h>
85 #endif
86 
87 #ifdef IFNET_MULTIQUEUE
88 #include <machine/intr_machdep.h>
89 #endif
90 
91 static int cxgb_setup_msix(adapter_t *, int);
92 static void cxgb_teardown_msix(adapter_t *);
93 static void cxgb_init(void *);
94 static void cxgb_init_locked(struct port_info *);
95 static void cxgb_stop_locked(struct port_info *);
96 static void cxgb_set_rxmode(struct port_info *);
97 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
98 static int cxgb_media_change(struct ifnet *);
99 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100 static int setup_sge_qsets(adapter_t *);
101 static void cxgb_async_intr(void *);
102 static void cxgb_ext_intr_handler(void *, int);
103 static void cxgb_tick_handler(void *, int);
104 static void cxgb_down_locked(struct adapter *sc);
105 static void cxgb_tick(void *);
106 static void setup_rss(adapter_t *sc);
107 
108 /* Attachment glue for the PCI controller end of the device.  Each port of
109  * the device is attached separately, as defined later.
110  */
111 static int cxgb_controller_probe(device_t);
112 static int cxgb_controller_attach(device_t);
113 static int cxgb_controller_detach(device_t);
114 static void cxgb_free(struct adapter *);
115 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
116     unsigned int end);
117 static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
118 static int cxgb_get_regs_len(void);
119 static int offload_open(struct port_info *pi);
120 static void touch_bars(device_t dev);
121 static int offload_close(struct t3cdev *tdev);
122 
123 static device_method_t cxgb_controller_methods[] = {
124 	DEVMETHOD(device_probe,		cxgb_controller_probe),
125 	DEVMETHOD(device_attach,	cxgb_controller_attach),
126 	DEVMETHOD(device_detach,	cxgb_controller_detach),
127 
128 	/* bus interface */
129 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
130 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
131 
132 	{ 0, 0 }
133 };
134 
135 static driver_t cxgb_controller_driver = {
136 	"cxgbc",
137 	cxgb_controller_methods,
138 	sizeof(struct adapter)
139 };
140 
141 static devclass_t	cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143 
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151 
152 static device_method_t cxgb_port_methods[] = {
153 	DEVMETHOD(device_probe,		cxgb_port_probe),
154 	DEVMETHOD(device_attach,	cxgb_port_attach),
155 	DEVMETHOD(device_detach,	cxgb_port_detach),
156 	{ 0, 0 }
157 };
158 
159 static driver_t cxgb_port_driver = {
160 	"cxgb",
161 	cxgb_port_methods,
162 	0
163 };
164 
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168 
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177 
178 static devclass_t	cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180 
181 #define SGE_MSIX_COUNT (SGE_QSETS + 1)
182 
183 /*
184  * The driver uses the best interrupt scheme available on a platform in the
185  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
186  * of these schemes the driver may consider as follows:
187  *
188  * msi = 2: choose from among all three options
189  * msi = 1 : only consider MSI and pin interrupts
190  * msi = 0: force pin interrupts
191  */
192 static int msi_allowed = 2;
193 
194 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
195 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
196 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
197     "MSI-X, MSI, INTx selector");
198 
199 /*
200  * The driver enables offload as a default.
201  * To disable it, use ofld_disable = 1.
202  */
203 static int ofld_disable = 0;
204 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
205 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
206     "disable ULP offload");
207 
208 /*
209  * The driver uses an auto-queue algorithm by default.
210  * To disable it and force a single queue-set per port, use singleq = 1.
211  */
212 static int singleq = 0;
213 TUNABLE_INT("hw.cxgb.singleq", &singleq);
214 SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
215     "use a single queue-set per port");
216 
217 
218 
219 int cxgb_use_16k_clusters = 0;
220 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
221 SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
222     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
223 
224 enum {
225 	MAX_TXQ_ENTRIES      = 16384,
226 	MAX_CTRL_TXQ_ENTRIES = 1024,
227 	MAX_RSPQ_ENTRIES     = 16384,
228 	MAX_RX_BUFFERS       = 16384,
229 	MAX_RX_JUMBO_BUFFERS = 16384,
230 	MIN_TXQ_ENTRIES      = 4,
231 	MIN_CTRL_TXQ_ENTRIES = 4,
232 	MIN_RSPQ_ENTRIES     = 32,
233 	MIN_FL_ENTRIES       = 32,
234 	MIN_FL_JUMBO_ENTRIES = 32
235 };
236 
237 struct filter_info {
238 	u32 sip;
239 	u32 sip_mask;
240 	u32 dip;
241 	u16 sport;
242 	u16 dport;
243 	u32 vlan:12;
244 	u32 vlan_prio:3;
245 	u32 mac_hit:1;
246 	u32 mac_idx:4;
247 	u32 mac_vld:1;
248 	u32 pkt_type:2;
249 	u32 report_filter_id:1;
250 	u32 pass:1;
251 	u32 rss:1;
252 	u32 qset:3;
253 	u32 locked:1;
254 	u32 valid:1;
255 };
256 
257 enum { FILTER_NO_VLAN_PRI = 7 };
258 
259 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
260 
261 /* Table for probing the cards.  The desc field isn't actually used */
262 struct cxgb_ident {
263 	uint16_t	vendor;
264 	uint16_t	device;
265 	int		index;
266 	char		*desc;
267 } cxgb_identifiers[] = {
268 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
269 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
270 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
271 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
272 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
273 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
274 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
275 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
276 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
277 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
278 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
279 	{0, 0, 0, NULL}
280 };
281 
282 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
283 
284 static __inline void
285 check_pkt_coalesce(struct sge_qset *qs)
286 {
287 	struct adapter *sc;
288 	struct sge_txq *txq;
289 
290 	txq = &qs->txq[TXQ_ETH];
291 	sc = qs->port->adapter;
292 
293 	if (sc->tunq_fill[qs->idx] && (txq->in_use < (txq->size - (txq->size>>2))))
294 		sc->tunq_fill[qs->idx] = 0;
295 	else if (!sc->tunq_fill[qs->idx] && (txq->in_use > (txq->size - (txq->size>>2))))
296 		sc->tunq_fill[qs->idx] = 1;
297 }
298 
299 static __inline char
300 t3rev2char(struct adapter *adapter)
301 {
302 	char rev = 'z';
303 
304 	switch(adapter->params.rev) {
305 	case T3_REV_A:
306 		rev = 'a';
307 		break;
308 	case T3_REV_B:
309 	case T3_REV_B2:
310 		rev = 'b';
311 		break;
312 	case T3_REV_C:
313 		rev = 'c';
314 		break;
315 	}
316 	return rev;
317 }
318 
319 static struct cxgb_ident *
320 cxgb_get_ident(device_t dev)
321 {
322 	struct cxgb_ident *id;
323 
324 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
325 		if ((id->vendor == pci_get_vendor(dev)) &&
326 		    (id->device == pci_get_device(dev))) {
327 			return (id);
328 		}
329 	}
330 	return (NULL);
331 }
332 
333 static const struct adapter_info *
334 cxgb_get_adapter_info(device_t dev)
335 {
336 	struct cxgb_ident *id;
337 	const struct adapter_info *ai;
338 
339 	id = cxgb_get_ident(dev);
340 	if (id == NULL)
341 		return (NULL);
342 
343 	ai = t3_get_adapter_info(id->index);
344 
345 	return (ai);
346 }
347 
348 static int
349 cxgb_controller_probe(device_t dev)
350 {
351 	const struct adapter_info *ai;
352 	char *ports, buf[80];
353 	int nports;
354 
355 	ai = cxgb_get_adapter_info(dev);
356 	if (ai == NULL)
357 		return (ENXIO);
358 
359 	nports = ai->nports0 + ai->nports1;
360 	if (nports == 1)
361 		ports = "port";
362 	else
363 		ports = "ports";
364 
365 	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
366 	device_set_desc_copy(dev, buf);
367 	return (BUS_PROBE_DEFAULT);
368 }
369 
370 #define FW_FNAME "t3fw%d%d%d"
371 #define TPEEPROM_NAME "t3%ctpe%d%d%d"
372 #define TPSRAM_NAME "t3%cps%d%d%d"
373 
374 static int
375 upgrade_fw(adapter_t *sc)
376 {
377 	char buf[32];
378 #ifdef FIRMWARE_LATEST
379 	const struct firmware *fw;
380 #else
381 	struct firmware *fw;
382 #endif
383 	int status;
384 
385 	snprintf(&buf[0], sizeof(buf), FW_FNAME,  FW_VERSION_MAJOR,
386 	    FW_VERSION_MINOR, FW_VERSION_MICRO);
387 
388 	fw = firmware_get(buf);
389 
390 	if (fw == NULL) {
391 		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
392 		return (ENOENT);
393 	} else
394 		device_printf(sc->dev, "updating firmware on card with %s\n", buf);
395 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
396 
397 	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
398 
399 	firmware_put(fw, FIRMWARE_UNLOAD);
400 
401 	return (status);
402 }
403 
404 static int
405 cxgb_controller_attach(device_t dev)
406 {
407 	device_t child;
408 	const struct adapter_info *ai;
409 	struct adapter *sc;
410 	int i, error = 0;
411 	uint32_t vers;
412 	int port_qsets = 1;
413 #ifdef MSI_SUPPORTED
414 	int msi_needed, reg;
415 #endif
416 	sc = device_get_softc(dev);
417 	sc->dev = dev;
418 	sc->msi_count = 0;
419 	ai = cxgb_get_adapter_info(dev);
420 
421 	/*
422 	 * XXX not really related but a recent addition
423 	 */
424 #ifdef MSI_SUPPORTED
425 	/* find the PCIe link width and set max read request to 4KB*/
426 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
427 		uint16_t lnk, pectl;
428 		lnk = pci_read_config(dev, reg + 0x12, 2);
429 		sc->link_width = (lnk >> 4) & 0x3f;
430 
431 		pectl = pci_read_config(dev, reg + 0x8, 2);
432 		pectl = (pectl & ~0x7000) | (5 << 12);
433 		pci_write_config(dev, reg + 0x8, pectl, 2);
434 	}
435 
436 	if (sc->link_width != 0 && sc->link_width <= 4 &&
437 	    (ai->nports0 + ai->nports1) <= 2) {
438 		device_printf(sc->dev,
439 		    "PCIe x%d Link, expect reduced performance\n",
440 		    sc->link_width);
441 	}
442 #endif
443 	touch_bars(dev);
444 	pci_enable_busmaster(dev);
445 	/*
446 	 * Allocate the registers and make them available to the driver.
447 	 * The registers that we care about for NIC mode are in BAR 0
448 	 */
449 	sc->regs_rid = PCIR_BAR(0);
450 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
451 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
452 		device_printf(dev, "Cannot allocate BAR\n");
453 		return (ENXIO);
454 	}
455 
456 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
457 	    device_get_unit(dev));
458 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
459 
460 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
461 	    device_get_unit(dev));
462 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
463 	    device_get_unit(dev));
464 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
465 	    device_get_unit(dev));
466 
467 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_DEF);
468 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
469 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
470 
471 	sc->bt = rman_get_bustag(sc->regs_res);
472 	sc->bh = rman_get_bushandle(sc->regs_res);
473 	sc->mmio_len = rman_get_size(sc->regs_res);
474 
475 	if (t3_prep_adapter(sc, ai, 1) < 0) {
476 		printf("prep adapter failed\n");
477 		error = ENODEV;
478 		goto out;
479 	}
480 	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
481 	 * enough messages for the queue sets.  If that fails, try falling
482 	 * back to MSI.  If that fails, then try falling back to the legacy
483 	 * interrupt pin model.
484 	 */
485 #ifdef MSI_SUPPORTED
486 
487 	sc->msix_regs_rid = 0x20;
488 	if ((msi_allowed >= 2) &&
489 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
490 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
491 
492 		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
493 
494 		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
495 		    (sc->msi_count != msi_needed)) {
496 			device_printf(dev, "msix allocation failed - msi_count = %d"
497 			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
498 			    msi_needed, error);
499 			sc->msi_count = 0;
500 			pci_release_msi(dev);
501 			bus_release_resource(dev, SYS_RES_MEMORY,
502 			    sc->msix_regs_rid, sc->msix_regs_res);
503 			sc->msix_regs_res = NULL;
504 		} else {
505 			sc->flags |= USING_MSIX;
506 			sc->cxgb_intr = t3_intr_msix;
507 		}
508 	}
509 
510 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
511 		sc->msi_count = 1;
512 		if (pci_alloc_msi(dev, &sc->msi_count)) {
513 			device_printf(dev, "alloc msi failed - will try INTx\n");
514 			sc->msi_count = 0;
515 			pci_release_msi(dev);
516 		} else {
517 			sc->flags |= USING_MSI;
518 			sc->irq_rid = 1;
519 			sc->cxgb_intr = t3_intr_msi;
520 		}
521 	}
522 #endif
523 	if (sc->msi_count == 0) {
524 		device_printf(dev, "using line interrupts\n");
525 		sc->irq_rid = 0;
526 		sc->cxgb_intr = t3b_intr;
527 	}
528 
529 
530 	/* Create a private taskqueue thread for handling driver events */
531 #ifdef TASKQUEUE_CURRENT
532 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
533 	    taskqueue_thread_enqueue, &sc->tq);
534 #else
535 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
536 	    taskqueue_thread_enqueue, &sc->tq);
537 #endif
538 	if (sc->tq == NULL) {
539 		device_printf(dev, "failed to allocate controller task queue\n");
540 		goto out;
541 	}
542 
543 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
544 	    device_get_nameunit(dev));
545 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
546 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
547 
548 
549 	/* Create a periodic callout for checking adapter status */
550 	callout_init(&sc->cxgb_tick_ch, TRUE);
551 
552 	if (t3_check_fw_version(sc) != 0) {
553 		/*
554 		 * Warn user that a firmware update will be attempted in init.
555 		 */
556 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
557 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
558 		sc->flags &= ~FW_UPTODATE;
559 	} else {
560 		sc->flags |= FW_UPTODATE;
561 	}
562 
563 	if (t3_check_tpsram_version(sc) != 0) {
564 		/*
565 		 * Warn user that a firmware update will be attempted in init.
566 		 */
567 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
568 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
569 		sc->flags &= ~TPS_UPTODATE;
570 	} else {
571 		sc->flags |= TPS_UPTODATE;
572 	}
573 
574 	if ((sc->flags & USING_MSIX) && !singleq)
575 		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
576 
577 	/*
578 	 * Create a child device for each MAC.  The ethernet attachment
579 	 * will be done in these children.
580 	 */
581 	for (i = 0; i < (sc)->params.nports; i++) {
582 		struct port_info *pi;
583 
584 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
585 			device_printf(dev, "failed to add child port\n");
586 			error = EINVAL;
587 			goto out;
588 		}
589 		pi = &sc->port[i];
590 		pi->adapter = sc;
591 		pi->nqsets = port_qsets;
592 		pi->first_qset = i*port_qsets;
593 		pi->port_id = i;
594 		pi->tx_chan = i >= ai->nports0;
595 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
596 		sc->rxpkt_map[pi->txpkt_intf] = i;
597 		sc->port[i].tx_chan = i >= ai->nports0;
598 		sc->portdev[i] = child;
599 		device_set_softc(child, pi);
600 	}
601 	if ((error = bus_generic_attach(dev)) != 0)
602 		goto out;
603 
604 	/*
605 	 * XXX need to poll for link status
606 	 */
607 	sc->params.stats_update_period = 1;
608 
609 	/* initialize sge private state */
610 	t3_sge_init_adapter(sc);
611 
612 	t3_led_ready(sc);
613 
614 	cxgb_offload_init();
615 	if (is_offload(sc)) {
616 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
617 		cxgb_adapter_ofld(sc);
618         }
619 	error = t3_get_fw_version(sc, &vers);
620 	if (error)
621 		goto out;
622 
623 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
624 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
625 	    G_FW_VERSION_MICRO(vers));
626 
627 	t3_add_attach_sysctls(sc);
628 out:
629 	if (error)
630 		cxgb_free(sc);
631 
632 	return (error);
633 }
634 
635 static int
636 cxgb_controller_detach(device_t dev)
637 {
638 	struct adapter *sc;
639 
640 	sc = device_get_softc(dev);
641 
642 	cxgb_free(sc);
643 
644 	return (0);
645 }
646 
647 static void
648 cxgb_free(struct adapter *sc)
649 {
650 	int i;
651 
652 
653 	cxgb_pcpu_shutdown_threads(sc);
654 	ADAPTER_LOCK(sc);
655 /*
656  * drops the lock
657  */
658 	cxgb_down_locked(sc);
659 
660 #ifdef MSI_SUPPORTED
661 	if (sc->flags & (USING_MSI | USING_MSIX)) {
662 		device_printf(sc->dev, "releasing msi message(s)\n");
663 		pci_release_msi(sc->dev);
664 	} else {
665 		device_printf(sc->dev, "no msi message to release\n");
666 	}
667 #endif
668 	if (sc->msix_regs_res != NULL) {
669 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
670 		    sc->msix_regs_res);
671 	}
672 
673 	if (sc->tq != NULL) {
674 		taskqueue_drain(sc->tq, &sc->ext_intr_task);
675 		taskqueue_drain(sc->tq, &sc->tick_task);
676 	}
677 	t3_sge_deinit_sw(sc);
678 	/*
679 	 * Wait for last callout
680 	 */
681 
682 	DELAY(hz*100);
683 
684 	for (i = 0; i < (sc)->params.nports; ++i) {
685 		if (sc->portdev[i] != NULL)
686 			device_delete_child(sc->dev, sc->portdev[i]);
687 	}
688 
689 	bus_generic_detach(sc->dev);
690 	if (sc->tq != NULL)
691 		taskqueue_free(sc->tq);
692 	if (is_offload(sc)) {
693 		cxgb_adapter_unofld(sc);
694 		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
695 			offload_close(&sc->tdev);
696 		else
697 			printf("cxgb_free: DEVMAP_BIT not set\n");
698 	} else
699 		printf("not offloading set\n");
700 	free(sc->filters, M_DEVBUF);
701 	t3_sge_free(sc);
702 
703 	cxgb_offload_exit();
704 
705 	if (sc->regs_res != NULL)
706 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
707 		    sc->regs_res);
708 
709 	MTX_DESTROY(&sc->mdio_lock);
710 	MTX_DESTROY(&sc->sge.reg_lock);
711 	MTX_DESTROY(&sc->elmer_lock);
712 	ADAPTER_LOCK_DEINIT(sc);
713 }
714 
715 /**
716  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
717  *	@sc: the controller softc
718  *
719  *	Determines how many sets of SGE queues to use and initializes them.
720  *	We support multiple queue sets per port if we have MSI-X, otherwise
721  *	just one queue set per port.
722  */
723 static int
724 setup_sge_qsets(adapter_t *sc)
725 {
726 	int i, j, err, irq_idx = 0, qset_idx = 0;
727 	u_int ntxq = SGE_TXQ_PER_SET;
728 
729 	if ((err = t3_sge_alloc(sc)) != 0) {
730 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
731 		return (err);
732 	}
733 
734 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
735 		irq_idx = -1;
736 
737 	for (i = 0; i < (sc)->params.nports; i++) {
738 		struct port_info *pi = &sc->port[i];
739 
740 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
741 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
742 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
743 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
744 			if (err) {
745 				t3_free_sge_resources(sc);
746 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
747 				    err);
748 				return (err);
749 			}
750 		}
751 	}
752 
753 	return (0);
754 }
755 
756 static void
757 cxgb_teardown_msix(adapter_t *sc)
758 {
759 	int i, nqsets;
760 
761 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
762 		nqsets += sc->port[i].nqsets;
763 
764 	for (i = 0; i < nqsets; i++) {
765 		if (sc->msix_intr_tag[i] != NULL) {
766 			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
767 			    sc->msix_intr_tag[i]);
768 			sc->msix_intr_tag[i] = NULL;
769 		}
770 		if (sc->msix_irq_res[i] != NULL) {
771 			bus_release_resource(sc->dev, SYS_RES_IRQ,
772 			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
773 			sc->msix_irq_res[i] = NULL;
774 		}
775 	}
776 }
777 
778 static int
779 cxgb_setup_msix(adapter_t *sc, int msix_count)
780 {
781 	int i, j, k, nqsets, rid;
782 
783 	/* The first message indicates link changes and error conditions */
784 	sc->irq_rid = 1;
785 	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
786 	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
787 		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
788 		return (EINVAL);
789 	}
790 
791 	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
792 #ifdef INTR_FILTERS
793 		NULL,
794 #endif
795 		cxgb_async_intr, sc, &sc->intr_tag)) {
796 		device_printf(sc->dev, "Cannot set up interrupt\n");
797 		return (EINVAL);
798 	}
799 	for (i = k = 0; i < (sc)->params.nports; i++) {
800 		nqsets = sc->port[i].nqsets;
801 		for (j = 0; j < nqsets; j++, k++) {
802 			struct sge_qset *qs = &sc->sge.qs[k];
803 
804 			rid = k + 2;
805 			if (cxgb_debug)
806 				printf("rid=%d ", rid);
807 			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
808 			    sc->dev, SYS_RES_IRQ, &rid,
809 			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
810 				device_printf(sc->dev, "Cannot allocate "
811 				    "interrupt for message %d\n", rid);
812 				return (EINVAL);
813 			}
814 			sc->msix_irq_rid[k] = rid;
815 			printf("setting up interrupt for port=%d\n",
816 			    qs->port->port_id);
817 			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
818 				INTR_MPSAFE|INTR_TYPE_NET,
819 #ifdef INTR_FILTERS
820 				NULL,
821 #endif
822 				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
823 				device_printf(sc->dev, "Cannot set up "
824 				    "interrupt for message %d\n", rid);
825 				return (EINVAL);
826 			}
827 #ifdef IFNET_MULTIQUEUE
828 			if (singleq == 0) {
829 				int vector = rman_get_start(sc->msix_irq_res[k]);
830 				if (bootverbose)
831 					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
832 				intr_bind(vector, k % mp_ncpus);
833 			}
834 #endif
835 		}
836 	}
837 
838 	return (0);
839 }
840 
841 static int
842 cxgb_port_probe(device_t dev)
843 {
844 	struct port_info *p;
845 	char buf[80];
846 
847 	p = device_get_softc(dev);
848 
849 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, p->port_type->desc);
850 	device_set_desc_copy(dev, buf);
851 	return (0);
852 }
853 
854 
855 static int
856 cxgb_makedev(struct port_info *pi)
857 {
858 
859 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
860 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
861 
862 	if (pi->port_cdev == NULL)
863 		return (ENOMEM);
864 
865 	pi->port_cdev->si_drv1 = (void *)pi;
866 
867 	return (0);
868 }
869 
870 
871 #ifdef TSO_SUPPORTED
872 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
873 /* Don't enable TSO6 yet */
874 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
875 #else
876 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
877 /* Don't enable TSO6 yet */
878 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
879 #define IFCAP_TSO4 0x0
880 #define IFCAP_TSO6 0x0
881 #define CSUM_TSO   0x0
882 #endif
883 
884 
885 static int
886 cxgb_port_attach(device_t dev)
887 {
888 	struct port_info *p;
889 	struct ifnet *ifp;
890 	int err, media_flags;
891 
892 	p = device_get_softc(dev);
893 
894 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
895 	    device_get_unit(device_get_parent(dev)), p->port_id);
896 	PORT_LOCK_INIT(p, p->lockbuf);
897 
898 	/* Allocate an ifnet object and set it up */
899 	ifp = p->ifp = if_alloc(IFT_ETHER);
900 	if (ifp == NULL) {
901 		device_printf(dev, "Cannot allocate ifnet\n");
902 		return (ENOMEM);
903 	}
904 
905 	/*
906 	 * Note that there is currently no watchdog timer.
907 	 */
908 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
909 	ifp->if_init = cxgb_init;
910 	ifp->if_softc = p;
911 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
912 	ifp->if_ioctl = cxgb_ioctl;
913 	ifp->if_start = cxgb_start;
914 
915 #ifdef IFNET_MULTIQUEUE
916 	ifp->if_flags |= IFF_MULTIQ;
917 	ifp->if_mq_start = cxgb_pcpu_start;
918 #endif
919 
920 	ifp->if_timer = 0;	/* Disable ifnet watchdog */
921 	ifp->if_watchdog = NULL;
922 
923 	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
924 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
925 	IFQ_SET_READY(&ifp->if_snd);
926 
927 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
928 	ifp->if_capabilities |= CXGB_CAP;
929 	ifp->if_capenable |= CXGB_CAP_ENABLE;
930 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
931 	/*
932 	 * disable TSO on 4-port - it isn't supported by the firmware yet
933 	 */
934 	if (p->adapter->params.nports > 2) {
935 		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
936 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
937 		ifp->if_hwassist &= ~CSUM_TSO;
938 	}
939 
940 	ether_ifattach(ifp, p->hw_addr);
941 	/*
942 	 * Only default to jumbo frames on 10GigE
943 	 */
944 	if (p->adapter->params.nports <= 2)
945 		ifp->if_mtu = 9000;
946 	if ((err = cxgb_makedev(p)) != 0) {
947 		printf("makedev failed %d\n", err);
948 		return (err);
949 	}
950 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
951 	    cxgb_media_status);
952 
953 	if (!strcmp(p->port_type->desc, "10GBASE-CX4")) {
954 		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
955 	} else if (!strcmp(p->port_type->desc, "10GBASE-SR")) {
956 		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
957 	} else if (!strcmp(p->port_type->desc, "10GBASE-XR")) {
958 		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
959 	} else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) {
960 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
961 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
962 			    0, NULL);
963 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
964 			    0, NULL);
965 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
966 			    0, NULL);
967 		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
968 			    0, NULL);
969 		media_flags = 0;
970 	} else {
971 	        printf("unsupported media type %s\n", p->port_type->desc);
972 		return (ENXIO);
973 	}
974 	if (media_flags) {
975 		ifmedia_add(&p->media, media_flags, 0, NULL);
976 		ifmedia_set(&p->media, media_flags);
977 	} else {
978 		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
979 		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
980 	}
981 
982 
983 	snprintf(p->taskqbuf, TASKQ_NAME_LEN, "cxgb_port_taskq%d", p->port_id);
984 #ifdef TASKQUEUE_CURRENT
985 	/* Create a port for handling TX without starvation */
986 	p->tq = taskqueue_create(p->taskqbuf, M_NOWAIT,
987 	    taskqueue_thread_enqueue, &p->tq);
988 #else
989 	/* Create a port for handling TX without starvation */
990 	p->tq = taskqueue_create_fast(p->taskqbuf, M_NOWAIT,
991 	    taskqueue_thread_enqueue, &p->tq);
992 #endif
993 	t3_sge_init_port(p);
994 
995 	return (0);
996 }
997 
998 static int
999 cxgb_port_detach(device_t dev)
1000 {
1001 	struct port_info *p;
1002 
1003 	p = device_get_softc(dev);
1004 
1005 	PORT_LOCK(p);
1006 	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1007 		cxgb_stop_locked(p);
1008 	PORT_UNLOCK(p);
1009 
1010 	if (p->tq != NULL) {
1011 		taskqueue_drain(p->tq, &p->start_task);
1012 		taskqueue_free(p->tq);
1013 		p->tq = NULL;
1014 	}
1015 
1016 	ether_ifdetach(p->ifp);
1017 	printf("waiting for callout to stop ...");
1018 	DELAY(1000000);
1019 	printf("done\n");
1020 	/*
1021 	 * the lock may be acquired in ifdetach
1022 	 */
1023 	PORT_LOCK_DEINIT(p);
1024 	if_free(p->ifp);
1025 
1026 	if (p->port_cdev != NULL)
1027 		destroy_dev(p->port_cdev);
1028 
1029 	return (0);
1030 }
1031 
1032 void
1033 t3_fatal_err(struct adapter *sc)
1034 {
1035 	u_int fw_status[4];
1036 
1037 	if (sc->flags & FULL_INIT_DONE) {
1038 		t3_sge_stop(sc);
1039 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1040 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1041 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1042 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1043 		t3_intr_disable(sc);
1044 	}
1045 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1046 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1047 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1048 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1049 }
1050 
1051 int
1052 t3_os_find_pci_capability(adapter_t *sc, int cap)
1053 {
1054 	device_t dev;
1055 	struct pci_devinfo *dinfo;
1056 	pcicfgregs *cfg;
1057 	uint32_t status;
1058 	uint8_t ptr;
1059 
1060 	dev = sc->dev;
1061 	dinfo = device_get_ivars(dev);
1062 	cfg = &dinfo->cfg;
1063 
1064 	status = pci_read_config(dev, PCIR_STATUS, 2);
1065 	if (!(status & PCIM_STATUS_CAPPRESENT))
1066 		return (0);
1067 
1068 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1069 	case 0:
1070 	case 1:
1071 		ptr = PCIR_CAP_PTR;
1072 		break;
1073 	case 2:
1074 		ptr = PCIR_CAP_PTR_2;
1075 		break;
1076 	default:
1077 		return (0);
1078 		break;
1079 	}
1080 	ptr = pci_read_config(dev, ptr, 1);
1081 
1082 	while (ptr != 0) {
1083 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1084 			return (ptr);
1085 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1086 	}
1087 
1088 	return (0);
1089 }
1090 
1091 int
1092 t3_os_pci_save_state(struct adapter *sc)
1093 {
1094 	device_t dev;
1095 	struct pci_devinfo *dinfo;
1096 
1097 	dev = sc->dev;
1098 	dinfo = device_get_ivars(dev);
1099 
1100 	pci_cfg_save(dev, dinfo, 0);
1101 	return (0);
1102 }
1103 
1104 int
1105 t3_os_pci_restore_state(struct adapter *sc)
1106 {
1107 	device_t dev;
1108 	struct pci_devinfo *dinfo;
1109 
1110 	dev = sc->dev;
1111 	dinfo = device_get_ivars(dev);
1112 
1113 	pci_cfg_restore(dev, dinfo);
1114 	return (0);
1115 }
1116 
1117 /**
1118  *	t3_os_link_changed - handle link status changes
1119  *	@adapter: the adapter associated with the link change
1120  *	@port_id: the port index whose limk status has changed
1121  *	@link_stat: the new status of the link
1122  *	@speed: the new speed setting
1123  *	@duplex: the new duplex setting
1124  *	@fc: the new flow-control setting
1125  *
1126  *	This is the OS-dependent handler for link status changes.  The OS
1127  *	neutral handler takes care of most of the processing for these events,
1128  *	then calls this handler for any OS-specific processing.
1129  */
1130 void
1131 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1132      int duplex, int fc)
1133 {
1134 	struct port_info *pi = &adapter->port[port_id];
1135 	struct cmac *mac = &adapter->port[port_id].mac;
1136 
1137 	if ((pi->ifp->if_flags & IFF_UP) == 0)
1138 		return;
1139 
1140 	if (link_status) {
1141 		t3_mac_enable(mac, MAC_DIRECTION_RX);
1142 		if_link_state_change(pi->ifp, LINK_STATE_UP);
1143 	} else {
1144 		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1145 		pi->phy.ops->power_down(&pi->phy, 1);
1146 		t3_mac_disable(mac, MAC_DIRECTION_RX);
1147 		t3_link_start(&pi->phy, mac, &pi->link_config);
1148 	}
1149 }
1150 
1151 /*
1152  * Interrupt-context handler for external (PHY) interrupts.
1153  */
1154 void
1155 t3_os_ext_intr_handler(adapter_t *sc)
1156 {
1157 	if (cxgb_debug)
1158 		printf("t3_os_ext_intr_handler\n");
1159 	/*
1160 	 * Schedule a task to handle external interrupts as they may be slow
1161 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1162 	 * interrupts in the meantime and let the task reenable them when
1163 	 * it's done.
1164 	 */
1165 	ADAPTER_LOCK(sc);
1166 	if (sc->slow_intr_mask) {
1167 		sc->slow_intr_mask &= ~F_T3DBG;
1168 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1169 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1170 	}
1171 	ADAPTER_UNLOCK(sc);
1172 }
1173 
1174 void
1175 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1176 {
1177 
1178 	/*
1179 	 * The ifnet might not be allocated before this gets called,
1180 	 * as this is called early on in attach by t3_prep_adapter
1181 	 * save the address off in the port structure
1182 	 */
1183 	if (cxgb_debug)
1184 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1185 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1186 }
1187 
1188 /**
1189  *	link_start - enable a port
1190  *	@p: the port to enable
1191  *
1192  *	Performs the MAC and PHY actions needed to enable a port.
1193  */
1194 static void
1195 cxgb_link_start(struct port_info *p)
1196 {
1197 	struct ifnet *ifp;
1198 	struct t3_rx_mode rm;
1199 	struct cmac *mac = &p->mac;
1200 
1201 	ifp = p->ifp;
1202 
1203 	t3_init_rx_mode(&rm, p);
1204 	if (!mac->multiport)
1205 		t3_mac_reset(mac);
1206 	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1207 	t3_mac_set_address(mac, 0, p->hw_addr);
1208 	t3_mac_set_rx_mode(mac, &rm);
1209 	t3_link_start(&p->phy, mac, &p->link_config);
1210 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1211 }
1212 
1213 /**
1214  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1215  *	@adap: the adapter
1216  *
1217  *	Sets up RSS to distribute packets to multiple receive queues.  We
1218  *	configure the RSS CPU lookup table to distribute to the number of HW
1219  *	receive queues, and the response queue lookup table to narrow that
1220  *	down to the response queues actually configured for each port.
1221  *	We always configure the RSS mapping for two ports since the mapping
1222  *	table has plenty of entries.
1223  */
1224 static void
1225 setup_rss(adapter_t *adap)
1226 {
1227 	int i;
1228 	u_int nq[2];
1229 	uint8_t cpus[SGE_QSETS + 1];
1230 	uint16_t rspq_map[RSS_TABLE_SIZE];
1231 
1232 	for (i = 0; i < SGE_QSETS; ++i)
1233 		cpus[i] = i;
1234 	cpus[SGE_QSETS] = 0xff;
1235 
1236 	nq[0] = nq[1] = 0;
1237 	for_each_port(adap, i) {
1238 		const struct port_info *pi = adap2pinfo(adap, i);
1239 
1240 		nq[pi->tx_chan] += pi->nqsets;
1241 	}
1242 	nq[0] = max(nq[0], 1U);
1243 	nq[1] = max(nq[1], 1U);
1244 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1245 		rspq_map[i] = i % nq[0];
1246 		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq[1]) + nq[0];
1247 	}
1248 	/* Calculate the reverse RSS map table */
1249 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1250 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1251 			adap->rrss_map[rspq_map[i]] = i;
1252 
1253 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1254 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1255 		      V_RRCPLCPUSIZE(6), cpus, rspq_map);
1256 
1257 }
1258 
1259 /*
1260  * Sends an mbuf to an offload queue driver
1261  * after dealing with any active network taps.
1262  */
1263 static inline int
1264 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1265 {
1266 	int ret;
1267 
1268 	ret = t3_offload_tx(tdev, m);
1269 	return (ret);
1270 }
1271 
1272 static int
1273 write_smt_entry(struct adapter *adapter, int idx)
1274 {
1275 	struct port_info *pi = &adapter->port[idx];
1276 	struct cpl_smt_write_req *req;
1277 	struct mbuf *m;
1278 
1279 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1280 		return (ENOMEM);
1281 
1282 	req = mtod(m, struct cpl_smt_write_req *);
1283 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1284 
1285 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1286 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1287 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1288 	req->iff = idx;
1289 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1290 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1291 
1292 	m_set_priority(m, 1);
1293 
1294 	offload_tx(&adapter->tdev, m);
1295 
1296 	return (0);
1297 }
1298 
1299 static int
1300 init_smt(struct adapter *adapter)
1301 {
1302 	int i;
1303 
1304 	for_each_port(adapter, i)
1305 		write_smt_entry(adapter, i);
1306 	return 0;
1307 }
1308 
1309 static void
1310 init_port_mtus(adapter_t *adapter)
1311 {
1312 	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1313 
1314 	if (adapter->port[1].ifp)
1315 		mtus |= adapter->port[1].ifp->if_mtu << 16;
1316 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1317 }
1318 
1319 static void
1320 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1321 			      int hi, int port)
1322 {
1323 	struct mbuf *m;
1324 	struct mngt_pktsched_wr *req;
1325 
1326 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1327 	if (m) {
1328 		req = mtod(m, struct mngt_pktsched_wr *);
1329 		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1330 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1331 		req->sched = sched;
1332 		req->idx = qidx;
1333 		req->min = lo;
1334 		req->max = hi;
1335 		req->binding = port;
1336 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1337 		t3_mgmt_tx(adap, m);
1338 	}
1339 }
1340 
1341 static void
1342 bind_qsets(adapter_t *sc)
1343 {
1344 	int i, j;
1345 
1346 	cxgb_pcpu_startup_threads(sc);
1347 	for (i = 0; i < (sc)->params.nports; ++i) {
1348 		const struct port_info *pi = adap2pinfo(sc, i);
1349 
1350 		for (j = 0; j < pi->nqsets; ++j) {
1351 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1352 					  -1, pi->tx_chan);
1353 
1354 		}
1355 	}
1356 }
1357 
1358 static void
1359 update_tpeeprom(struct adapter *adap)
1360 {
1361 #ifdef FIRMWARE_LATEST
1362 	const struct firmware *tpeeprom;
1363 #else
1364 	struct firmware *tpeeprom;
1365 #endif
1366 
1367 	char buf[64];
1368 	uint32_t version;
1369 	unsigned int major, minor;
1370 	int ret, len;
1371 	char rev;
1372 
1373 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1374 
1375 	major = G_TP_VERSION_MAJOR(version);
1376 	minor = G_TP_VERSION_MINOR(version);
1377 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1378 		return;
1379 
1380 	rev = t3rev2char(adap);
1381 
1382 	snprintf(buf, sizeof(buf), TPEEPROM_NAME, rev,
1383 		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1384 
1385 	tpeeprom = firmware_get(buf);
1386 	if (tpeeprom == NULL) {
1387 		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1388 			buf);
1389 		return;
1390 	}
1391 
1392 	len = tpeeprom->datasize - 4;
1393 
1394 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1395 	if (ret)
1396 		goto release_tpeeprom;
1397 
1398 	if (len != TP_SRAM_LEN) {
1399 		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", buf, len, TP_SRAM_LEN);
1400 		return;
1401 	}
1402 
1403 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1404 	    TP_SRAM_OFFSET);
1405 
1406 	if (!ret) {
1407 		device_printf(adap->dev,
1408 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1409 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1410 	} else
1411 		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1412 
1413 release_tpeeprom:
1414 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1415 
1416 	return;
1417 }
1418 
1419 static int
1420 update_tpsram(struct adapter *adap)
1421 {
1422 #ifdef FIRMWARE_LATEST
1423 	const struct firmware *tpsram;
1424 #else
1425 	struct firmware *tpsram;
1426 #endif
1427 	char buf[64];
1428 	int ret;
1429 	char rev;
1430 
1431 	rev = t3rev2char(adap);
1432 	if (!rev)
1433 		return 0;
1434 
1435 	update_tpeeprom(adap);
1436 
1437 	snprintf(buf, sizeof(buf), TPSRAM_NAME, rev,
1438 		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1439 
1440 	tpsram = firmware_get(buf);
1441 	if (tpsram == NULL){
1442 		device_printf(adap->dev, "could not load TP SRAM: unable to load %s\n",
1443 			buf);
1444 		return (EINVAL);
1445 	} else
1446 		device_printf(adap->dev, "updating TP SRAM with %s\n", buf);
1447 
1448 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1449 	if (ret)
1450 		goto release_tpsram;
1451 
1452 	ret = t3_set_proto_sram(adap, tpsram->data);
1453 	if (ret)
1454 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1455 
1456 release_tpsram:
1457 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1458 
1459 	return ret;
1460 }
1461 
1462 /**
1463  *	cxgb_up - enable the adapter
1464  *	@adap: adapter being enabled
1465  *
1466  *	Called when the first port is enabled, this function performs the
1467  *	actions necessary to make an adapter operational, such as completing
1468  *	the initialization of HW modules, and enabling interrupts.
1469  *
1470  */
1471 static int
1472 cxgb_up(struct adapter *sc)
1473 {
1474 	int err = 0;
1475 
1476 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1477 
1478 		if ((sc->flags & FW_UPTODATE) == 0)
1479 			if ((err = upgrade_fw(sc)))
1480 				goto out;
1481 		if ((sc->flags & TPS_UPTODATE) == 0)
1482 			if ((err = update_tpsram(sc)))
1483 				goto out;
1484 		err = t3_init_hw(sc, 0);
1485 		if (err)
1486 			goto out;
1487 
1488 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1489 
1490 		err = setup_sge_qsets(sc);
1491 		if (err)
1492 			goto out;
1493 
1494 		setup_rss(sc);
1495 		t3_add_configured_sysctls(sc);
1496 		sc->flags |= FULL_INIT_DONE;
1497 	}
1498 
1499 	t3_intr_clear(sc);
1500 
1501 	/* If it's MSI or INTx, allocate a single interrupt for everything */
1502 	if ((sc->flags & USING_MSIX) == 0) {
1503 		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1504 		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1505 			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1506 			    sc->irq_rid);
1507 			err = EINVAL;
1508 			goto out;
1509 		}
1510 		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1511 
1512 		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1513 #ifdef INTR_FILTERS
1514 			NULL,
1515 #endif
1516 			sc->cxgb_intr, sc, &sc->intr_tag)) {
1517 			device_printf(sc->dev, "Cannot set up interrupt\n");
1518 			err = EINVAL;
1519 			goto irq_err;
1520 		}
1521 	} else {
1522 		cxgb_setup_msix(sc, sc->msi_count);
1523 	}
1524 
1525 	t3_sge_start(sc);
1526 	t3_intr_enable(sc);
1527 
1528 	if (!(sc->flags & QUEUES_BOUND)) {
1529 		printf("bind qsets\n");
1530 		bind_qsets(sc);
1531 		sc->flags |= QUEUES_BOUND;
1532 	}
1533 out:
1534 	return (err);
1535 irq_err:
1536 	CH_ERR(sc, "request_irq failed, err %d\n", err);
1537 	goto out;
1538 }
1539 
1540 
1541 /*
1542  * Release resources when all the ports and offloading have been stopped.
1543  */
1544 static void
1545 cxgb_down_locked(struct adapter *sc)
1546 {
1547 	int i;
1548 
1549 	t3_sge_stop(sc);
1550 	t3_intr_disable(sc);
1551 
1552 	if (sc->intr_tag != NULL) {
1553 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1554 		sc->intr_tag = NULL;
1555 	}
1556 	if (sc->irq_res != NULL) {
1557 		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1558 		    sc->irq_rid, sc->irq_res);
1559 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1560 		    sc->irq_res);
1561 		sc->irq_res = NULL;
1562 	}
1563 
1564 	if (sc->flags & USING_MSIX)
1565 		cxgb_teardown_msix(sc);
1566 	ADAPTER_UNLOCK(sc);
1567 
1568 	callout_stop(&sc->cxgb_tick_ch);
1569 	callout_stop(&sc->sge_timer_ch);
1570 	callout_drain(&sc->cxgb_tick_ch);
1571 	callout_drain(&sc->sge_timer_ch);
1572 
1573 	if (sc->tq != NULL) {
1574 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1575 		for (i = 0; i < sc->params.nports; i++)
1576 			taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task);
1577 	}
1578 }
1579 
1580 static int
1581 offload_open(struct port_info *pi)
1582 {
1583 	struct adapter *adapter = pi->adapter;
1584 	struct t3cdev *tdev = &adapter->tdev;
1585 #ifdef notyet
1586 	    T3CDEV(pi->ifp);
1587 #endif
1588 	int adap_up = adapter->open_device_map & PORT_MASK;
1589 	int err = 0;
1590 
1591 	printf("device_map=0x%x\n", adapter->open_device_map);
1592 	if (atomic_cmpset_int(&adapter->open_device_map,
1593 		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1594 		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1595 		return (0);
1596 
1597 
1598 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1599 		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n", adapter->open_device_map);
1600 	ADAPTER_LOCK(pi->adapter);
1601 	if (!adap_up)
1602 		err = cxgb_up(adapter);
1603 	ADAPTER_UNLOCK(pi->adapter);
1604 	if (err)
1605 		return (err);
1606 
1607 	t3_tp_set_offload_mode(adapter, 1);
1608 	tdev->lldev = pi->ifp;
1609 	err = cxgb_offload_activate(adapter);
1610 	if (err)
1611 		goto out;
1612 
1613 	init_port_mtus(adapter);
1614 	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1615 		     adapter->params.b_wnd,
1616 		     adapter->params.rev == 0 ?
1617 		       adapter->port[0].ifp->if_mtu : 0xffff);
1618 	init_smt(adapter);
1619 
1620 	/* Call back all registered clients */
1621 	cxgb_add_clients(tdev);
1622 
1623 out:
1624 	/* restore them in case the offload module has changed them */
1625 	if (err) {
1626 		t3_tp_set_offload_mode(adapter, 0);
1627 		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1628 		cxgb_set_dummy_ops(tdev);
1629 	}
1630 	return (err);
1631 }
1632 
1633 static int
1634 offload_close(struct t3cdev *tdev)
1635 {
1636 	struct adapter *adapter = tdev2adap(tdev);
1637 
1638 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) {
1639 		printf("offload_close: DEVMAP_BIT not set\n");
1640 
1641 		return (0);
1642 	}
1643 
1644 	/* Call back all registered clients */
1645 	cxgb_remove_clients(tdev);
1646 	tdev->lldev = NULL;
1647 	cxgb_set_dummy_ops(tdev);
1648 	t3_tp_set_offload_mode(adapter, 0);
1649 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1650 
1651 	ADAPTER_LOCK(adapter);
1652 	if (!adapter->open_device_map)
1653 		cxgb_down_locked(adapter);
1654 	else
1655 		ADAPTER_UNLOCK(adapter);
1656 	cxgb_offload_deactivate(adapter);
1657 	return (0);
1658 }
1659 
1660 
1661 static void
1662 cxgb_init(void *arg)
1663 {
1664 	struct port_info *p = arg;
1665 
1666 	PORT_LOCK(p);
1667 	cxgb_init_locked(p);
1668 	PORT_UNLOCK(p);
1669 }
1670 
1671 static void
1672 cxgb_init_locked(struct port_info *p)
1673 {
1674 	struct ifnet *ifp;
1675 	adapter_t *sc = p->adapter;
1676 	int err;
1677 
1678 	PORT_LOCK_ASSERT_OWNED(p);
1679 	ifp = p->ifp;
1680 
1681 	ADAPTER_LOCK(p->adapter);
1682 	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1683 		ADAPTER_UNLOCK(p->adapter);
1684 		cxgb_stop_locked(p);
1685 		return;
1686 	}
1687 	if (p->adapter->open_device_map == 0) {
1688 		t3_intr_clear(sc);
1689 	}
1690 	setbit(&p->adapter->open_device_map, p->port_id);
1691 	ADAPTER_UNLOCK(p->adapter);
1692 
1693 	if (is_offload(sc) && !ofld_disable) {
1694 		err = offload_open(p);
1695 		if (err)
1696 			log(LOG_WARNING,
1697 			    "Could not initialize offload capabilities\n");
1698 		else
1699 			printf("offload opened\n");
1700 	}
1701 	cxgb_link_start(p);
1702 	t3_link_changed(sc, p->port_id);
1703 	ifp->if_baudrate = p->link_config.speed * 1000000;
1704 
1705 	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1706 	t3_port_intr_enable(sc, p->port_id);
1707 
1708 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
1709 	t3_sge_reset_adapter(sc);
1710 
1711 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1712 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1713 }
1714 
1715 static void
1716 cxgb_set_rxmode(struct port_info *p)
1717 {
1718 	struct t3_rx_mode rm;
1719 	struct cmac *mac = &p->mac;
1720 
1721 	PORT_LOCK_ASSERT_OWNED(p);
1722 
1723 	t3_init_rx_mode(&rm, p);
1724 	t3_mac_set_rx_mode(mac, &rm);
1725 }
1726 
1727 static void
1728 cxgb_stop_locked(struct port_info *p)
1729 {
1730 	struct ifnet *ifp;
1731 
1732 	PORT_LOCK_ASSERT_OWNED(p);
1733 	ADAPTER_LOCK_ASSERT_NOTOWNED(p->adapter);
1734 
1735 	ifp = p->ifp;
1736 	t3_port_intr_disable(p->adapter, p->port_id);
1737 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1738 	p->phy.ops->power_down(&p->phy, 1);
1739 	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1740 
1741 	ADAPTER_LOCK(p->adapter);
1742 	clrbit(&p->adapter->open_device_map, p->port_id);
1743 
1744 	if (p->adapter->open_device_map == 0) {
1745 		cxgb_down_locked(p->adapter);
1746 	} else
1747 		ADAPTER_UNLOCK(p->adapter);
1748 
1749 }
1750 
1751 static int
1752 cxgb_set_mtu(struct port_info *p, int mtu)
1753 {
1754 	struct ifnet *ifp = p->ifp;
1755 	int error = 0;
1756 
1757 	if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
1758 		error = EINVAL;
1759 	else if (ifp->if_mtu != mtu) {
1760 		PORT_LOCK(p);
1761 		ifp->if_mtu = mtu;
1762 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1763 			callout_stop(&p->adapter->cxgb_tick_ch);
1764 			cxgb_stop_locked(p);
1765 			cxgb_init_locked(p);
1766 		}
1767 		PORT_UNLOCK(p);
1768 	}
1769 	return (error);
1770 }
1771 
1772 static int
1773 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1774 {
1775 	struct port_info *p = ifp->if_softc;
1776 	struct ifaddr *ifa = (struct ifaddr *)data;
1777 	struct ifreq *ifr = (struct ifreq *)data;
1778 	int flags, error = 0;
1779 	uint32_t mask;
1780 
1781 	/*
1782 	 * XXX need to check that we aren't in the middle of an unload
1783 	 */
1784 	switch (command) {
1785 	case SIOCSIFMTU:
1786 		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1787 		break;
1788 	case SIOCSIFADDR:
1789 	case SIOCGIFADDR:
1790 		PORT_LOCK(p);
1791 		if (ifa->ifa_addr->sa_family == AF_INET) {
1792 			ifp->if_flags |= IFF_UP;
1793 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1794 				cxgb_init_locked(p);
1795 			arp_ifinit(ifp, ifa);
1796 		} else
1797 			error = ether_ioctl(ifp, command, data);
1798 		PORT_UNLOCK(p);
1799 		break;
1800 	case SIOCSIFFLAGS:
1801 		callout_drain(&p->adapter->cxgb_tick_ch);
1802 		PORT_LOCK(p);
1803 		if (ifp->if_flags & IFF_UP) {
1804 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1805 				flags = p->if_flags;
1806 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1807 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1808 					cxgb_set_rxmode(p);
1809 			} else
1810 				cxgb_init_locked(p);
1811 			p->if_flags = ifp->if_flags;
1812 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1813 			cxgb_stop_locked(p);
1814 
1815 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1816 			adapter_t *sc = p->adapter;
1817 			callout_reset(&sc->cxgb_tick_ch, hz,
1818 			    cxgb_tick, sc);
1819 		}
1820 		PORT_UNLOCK(p);
1821 		break;
1822 	case SIOCSIFMEDIA:
1823 	case SIOCGIFMEDIA:
1824 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1825 		break;
1826 	case SIOCSIFCAP:
1827 		PORT_LOCK(p);
1828 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1829 		if (mask & IFCAP_TXCSUM) {
1830 			if (IFCAP_TXCSUM & ifp->if_capenable) {
1831 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1832 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1833 				    | CSUM_TSO);
1834 			} else {
1835 				ifp->if_capenable |= IFCAP_TXCSUM;
1836 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1837 			}
1838 		} else if (mask & IFCAP_RXCSUM) {
1839 			if (IFCAP_RXCSUM & ifp->if_capenable) {
1840 				ifp->if_capenable &= ~IFCAP_RXCSUM;
1841 			} else {
1842 				ifp->if_capenable |= IFCAP_RXCSUM;
1843 			}
1844 		}
1845 		if (mask & IFCAP_TSO4) {
1846 			if (IFCAP_TSO4 & ifp->if_capenable) {
1847 				ifp->if_capenable &= ~IFCAP_TSO4;
1848 				ifp->if_hwassist &= ~CSUM_TSO;
1849 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1850 				ifp->if_capenable |= IFCAP_TSO4;
1851 				ifp->if_hwassist |= CSUM_TSO;
1852 			} else {
1853 				if (cxgb_debug)
1854 					printf("cxgb requires tx checksum offload"
1855 					    " be enabled to use TSO\n");
1856 				error = EINVAL;
1857 			}
1858 		}
1859 		PORT_UNLOCK(p);
1860 		break;
1861 	default:
1862 		error = ether_ioctl(ifp, command, data);
1863 		break;
1864 	}
1865 	return (error);
1866 }
1867 
1868 int
1869 cxgb_tx_common(struct ifnet *ifp, struct sge_qset *qs, uint32_t txmax)
1870 {
1871 	struct sge_txq *txq;
1872 	int err, in_use_init, count;
1873 	struct mbuf **m_vec;
1874 
1875 	txq = &qs->txq[TXQ_ETH];
1876 	m_vec = txq->txq_m_vec;
1877 	in_use_init = txq->in_use;
1878 	err = 0;
1879 	while ((txq->in_use - in_use_init < txmax) &&
1880 	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1881 		check_pkt_coalesce(qs);
1882 		count = cxgb_dequeue_packet(ifp, txq, m_vec);
1883 		if (count == 0)
1884 			break;
1885 		ETHER_BPF_MTAP(ifp, m_vec[0]);
1886 
1887 		if ((err = t3_encap(qs, m_vec, count)) != 0)
1888 			break;
1889 		txq->txq_enqueued += count;
1890 	}
1891 #if 0 /* !MULTIQ */
1892 	if (__predict_false(err)) {
1893 		if (err == ENOMEM) {
1894 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1895 			IFQ_LOCK(&ifp->if_snd);
1896 			IFQ_DRV_PREPEND(&ifp->if_snd, m_vec[0]);
1897 			IFQ_UNLOCK(&ifp->if_snd);
1898 		}
1899 	}
1900 	if (err == 0 && m_vec[0] == NULL) {
1901 		err = ENOBUFS;
1902 	}
1903 	else if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1904 	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1905 		setbit(&qs->txq_stopped, TXQ_ETH);
1906 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1907 		err = ENOSPC;
1908 	}
1909 #else
1910 	if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC)) {
1911 		err = ENOSPC;
1912 		setbit(&qs->txq_stopped, TXQ_ETH);
1913 	}
1914 	if (err == ENOMEM) {
1915 		int i;
1916 		/*
1917 		 * Sub-optimal :-/
1918 		 */
1919 		for (i = 0; i < count; i++)
1920 			m_freem(m_vec[i]);
1921 	}
1922 #endif
1923 	return (err);
1924 }
1925 
1926 static int
1927 cxgb_media_change(struct ifnet *ifp)
1928 {
1929 	if_printf(ifp, "media change not supported\n");
1930 	return (ENXIO);
1931 }
1932 
1933 static void
1934 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1935 {
1936 	struct port_info *p = ifp->if_softc;
1937 
1938 	ifmr->ifm_status = IFM_AVALID;
1939 	ifmr->ifm_active = IFM_ETHER;
1940 
1941 	if (!p->link_config.link_ok)
1942 		return;
1943 
1944 	ifmr->ifm_status |= IFM_ACTIVE;
1945 
1946 	switch (p->link_config.speed) {
1947 	case 10:
1948 		ifmr->ifm_active |= IFM_10_T;
1949 		break;
1950 	case 100:
1951 		ifmr->ifm_active |= IFM_100_TX;
1952 			break;
1953 	case 1000:
1954 		ifmr->ifm_active |= IFM_1000_T;
1955 		break;
1956 	}
1957 
1958 	if (p->link_config.duplex)
1959 		ifmr->ifm_active |= IFM_FDX;
1960 	else
1961 		ifmr->ifm_active |= IFM_HDX;
1962 }
1963 
1964 static void
1965 cxgb_async_intr(void *data)
1966 {
1967 	adapter_t *sc = data;
1968 
1969 	if (cxgb_debug)
1970 		device_printf(sc->dev, "cxgb_async_intr\n");
1971 	/*
1972 	 * May need to sleep - defer to taskqueue
1973 	 */
1974 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
1975 }
1976 
1977 static void
1978 cxgb_ext_intr_handler(void *arg, int count)
1979 {
1980 	adapter_t *sc = (adapter_t *)arg;
1981 
1982 	if (cxgb_debug)
1983 		printf("cxgb_ext_intr_handler\n");
1984 
1985 	t3_phy_intr_handler(sc);
1986 
1987 	/* Now reenable external interrupts */
1988 	ADAPTER_LOCK(sc);
1989 	if (sc->slow_intr_mask) {
1990 		sc->slow_intr_mask |= F_T3DBG;
1991 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1992 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1993 	}
1994 	ADAPTER_UNLOCK(sc);
1995 }
1996 
1997 static void
1998 check_link_status(adapter_t *sc)
1999 {
2000 	int i;
2001 
2002 	for (i = 0; i < (sc)->params.nports; ++i) {
2003 		struct port_info *p = &sc->port[i];
2004 
2005 		if (!(p->port_type->caps & SUPPORTED_IRQ))
2006 			t3_link_changed(sc, i);
2007 		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2008 	}
2009 }
2010 
2011 static void
2012 check_t3b2_mac(struct adapter *adapter)
2013 {
2014 	int i;
2015 
2016 	for_each_port(adapter, i) {
2017 		struct port_info *p = &adapter->port[i];
2018 		struct ifnet *ifp = p->ifp;
2019 		int status;
2020 
2021 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2022 			continue;
2023 
2024 		status = 0;
2025 		PORT_LOCK(p);
2026 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2027 			status = t3b2_mac_watchdog_task(&p->mac);
2028 		if (status == 1)
2029 			p->mac.stats.num_toggled++;
2030 		else if (status == 2) {
2031 			struct cmac *mac = &p->mac;
2032 
2033 			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
2034 			    + ETHER_VLAN_ENCAP_LEN);
2035 			t3_mac_set_address(mac, 0, p->hw_addr);
2036 			cxgb_set_rxmode(p);
2037 			t3_link_start(&p->phy, mac, &p->link_config);
2038 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2039 			t3_port_intr_enable(adapter, p->port_id);
2040 			p->mac.stats.num_resets++;
2041 		}
2042 		PORT_UNLOCK(p);
2043 	}
2044 }
2045 
2046 static void
2047 cxgb_tick(void *arg)
2048 {
2049 	adapter_t *sc = (adapter_t *)arg;
2050 	int i, running = 0;
2051 
2052 	for_each_port(sc, i) {
2053 
2054 		struct port_info *p = &sc->port[i];
2055 		struct ifnet *ifp = p->ifp;
2056 		PORT_LOCK(p);
2057 
2058 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2059 			running = 1;
2060 		PORT_UNLOCK(p);
2061 	}
2062 
2063 	if (running == 0)
2064 		return;
2065 
2066 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2067 
2068 	if (sc->open_device_map != 0)
2069 		callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2070 }
2071 
2072 static void
2073 cxgb_tick_handler(void *arg, int count)
2074 {
2075 	adapter_t *sc = (adapter_t *)arg;
2076 	const struct adapter_params *p = &sc->params;
2077 
2078 	ADAPTER_LOCK(sc);
2079 	if (p->linkpoll_period)
2080 		check_link_status(sc);
2081 
2082 	/*
2083 	 * adapter lock can currently only be acquire after the
2084 	 * port lock
2085 	 */
2086 	ADAPTER_UNLOCK(sc);
2087 
2088 	if (p->rev == T3_REV_B2 && p->nports < 4)
2089 		check_t3b2_mac(sc);
2090 }
2091 
2092 static void
2093 touch_bars(device_t dev)
2094 {
2095 	/*
2096 	 * Don't enable yet
2097 	 */
2098 #if !defined(__LP64__) && 0
2099 	u32 v;
2100 
2101 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2102 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2103 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2104 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2105 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2106 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2107 #endif
2108 }
2109 
2110 static int
2111 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2112 {
2113 	uint8_t *buf;
2114 	int err = 0;
2115 	u32 aligned_offset, aligned_len, *p;
2116 	struct adapter *adapter = pi->adapter;
2117 
2118 
2119 	aligned_offset = offset & ~3;
2120 	aligned_len = (len + (offset & 3) + 3) & ~3;
2121 
2122 	if (aligned_offset != offset || aligned_len != len) {
2123 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2124 		if (!buf)
2125 			return (ENOMEM);
2126 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2127 		if (!err && aligned_len > 4)
2128 			err = t3_seeprom_read(adapter,
2129 					      aligned_offset + aligned_len - 4,
2130 					      (u32 *)&buf[aligned_len - 4]);
2131 		if (err)
2132 			goto out;
2133 		memcpy(buf + (offset & 3), data, len);
2134 	} else
2135 		buf = (uint8_t *)(uintptr_t)data;
2136 
2137 	err = t3_seeprom_wp(adapter, 0);
2138 	if (err)
2139 		goto out;
2140 
2141 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2142 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2143 		aligned_offset += 4;
2144 	}
2145 
2146 	if (!err)
2147 		err = t3_seeprom_wp(adapter, 1);
2148 out:
2149 	if (buf != data)
2150 		free(buf, M_DEVBUF);
2151 	return err;
2152 }
2153 
2154 
2155 static int
2156 in_range(int val, int lo, int hi)
2157 {
2158 	return val < 0 || (val <= hi && val >= lo);
2159 }
2160 
2161 static int
2162 cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2163 {
2164        return (0);
2165 }
2166 
2167 static int
2168 cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2169 {
2170        return (0);
2171 }
2172 
2173 static int
2174 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2175     int fflag, struct thread *td)
2176 {
2177 	int mmd, error = 0;
2178 	struct port_info *pi = dev->si_drv1;
2179 	adapter_t *sc = pi->adapter;
2180 
2181 #ifdef PRIV_SUPPORTED
2182 	if (priv_check(td, PRIV_DRIVER)) {
2183 		if (cxgb_debug)
2184 			printf("user does not have access to privileged ioctls\n");
2185 		return (EPERM);
2186 	}
2187 #else
2188 	if (suser(td)) {
2189 		if (cxgb_debug)
2190 			printf("user does not have access to privileged ioctls\n");
2191 		return (EPERM);
2192 	}
2193 #endif
2194 
2195 	switch (cmd) {
2196 	case SIOCGMIIREG: {
2197 		uint32_t val;
2198 		struct cphy *phy = &pi->phy;
2199 		struct mii_data *mid = (struct mii_data *)data;
2200 
2201 		if (!phy->mdio_read)
2202 			return (EOPNOTSUPP);
2203 		if (is_10G(sc)) {
2204 			mmd = mid->phy_id >> 8;
2205 			if (!mmd)
2206 				mmd = MDIO_DEV_PCS;
2207 			else if (mmd > MDIO_DEV_XGXS)
2208 				return (EINVAL);
2209 
2210 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2211 					     mid->reg_num, &val);
2212 		} else
2213 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2214 					     mid->reg_num & 0x1f, &val);
2215 		if (error == 0)
2216 			mid->val_out = val;
2217 		break;
2218 	}
2219 	case SIOCSMIIREG: {
2220 		struct cphy *phy = &pi->phy;
2221 		struct mii_data *mid = (struct mii_data *)data;
2222 
2223 		if (!phy->mdio_write)
2224 			return (EOPNOTSUPP);
2225 		if (is_10G(sc)) {
2226 			mmd = mid->phy_id >> 8;
2227 			if (!mmd)
2228 				mmd = MDIO_DEV_PCS;
2229 			else if (mmd > MDIO_DEV_XGXS)
2230 				return (EINVAL);
2231 
2232 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2233 					      mmd, mid->reg_num, mid->val_in);
2234 		} else
2235 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2236 					      mid->reg_num & 0x1f,
2237 					      mid->val_in);
2238 		break;
2239 	}
2240 	case CHELSIO_SETREG: {
2241 		struct ch_reg *edata = (struct ch_reg *)data;
2242 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2243 			return (EFAULT);
2244 		t3_write_reg(sc, edata->addr, edata->val);
2245 		break;
2246 	}
2247 	case CHELSIO_GETREG: {
2248 		struct ch_reg *edata = (struct ch_reg *)data;
2249 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2250 			return (EFAULT);
2251 		edata->val = t3_read_reg(sc, edata->addr);
2252 		break;
2253 	}
2254 	case CHELSIO_GET_SGE_CONTEXT: {
2255 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2256 		mtx_lock(&sc->sge.reg_lock);
2257 		switch (ecntxt->cntxt_type) {
2258 		case CNTXT_TYPE_EGRESS:
2259 			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2260 			    ecntxt->data);
2261 			break;
2262 		case CNTXT_TYPE_FL:
2263 			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
2264 			    ecntxt->data);
2265 			break;
2266 		case CNTXT_TYPE_RSP:
2267 			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2268 			    ecntxt->data);
2269 			break;
2270 		case CNTXT_TYPE_CQ:
2271 			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
2272 			    ecntxt->data);
2273 			break;
2274 		default:
2275 			error = EINVAL;
2276 			break;
2277 		}
2278 		mtx_unlock(&sc->sge.reg_lock);
2279 		break;
2280 	}
2281 	case CHELSIO_GET_SGE_DESC: {
2282 		struct ch_desc *edesc = (struct ch_desc *)data;
2283 		int ret;
2284 		if (edesc->queue_num >= SGE_QSETS * 6)
2285 			return (EINVAL);
2286 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2287 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2288 		if (ret < 0)
2289 			return (EINVAL);
2290 		edesc->size = ret;
2291 		break;
2292 	}
2293 	case CHELSIO_SET_QSET_PARAMS: {
2294 		struct qset_params *q;
2295 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2296 
2297 		if (t->qset_idx >= SGE_QSETS)
2298 			return (EINVAL);
2299 		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
2300 		    !in_range(t->cong_thres, 0, 255) ||
2301 		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
2302 			      MAX_TXQ_ENTRIES) ||
2303 		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
2304 			      MAX_TXQ_ENTRIES) ||
2305 		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
2306 			      MAX_CTRL_TXQ_ENTRIES) ||
2307 		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
2308 		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
2309 			      MAX_RX_JUMBO_BUFFERS) ||
2310 		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
2311 			return (EINVAL);
2312 		if ((sc->flags & FULL_INIT_DONE) &&
2313 		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2314 		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2315 		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2316 		     t->polling >= 0 || t->cong_thres >= 0))
2317 			return (EBUSY);
2318 
2319 		q = &sc->params.sge.qset[t->qset_idx];
2320 
2321 		if (t->rspq_size >= 0)
2322 			q->rspq_size = t->rspq_size;
2323 		if (t->fl_size[0] >= 0)
2324 			q->fl_size = t->fl_size[0];
2325 		if (t->fl_size[1] >= 0)
2326 			q->jumbo_size = t->fl_size[1];
2327 		if (t->txq_size[0] >= 0)
2328 			q->txq_size[0] = t->txq_size[0];
2329 		if (t->txq_size[1] >= 0)
2330 			q->txq_size[1] = t->txq_size[1];
2331 		if (t->txq_size[2] >= 0)
2332 			q->txq_size[2] = t->txq_size[2];
2333 		if (t->cong_thres >= 0)
2334 			q->cong_thres = t->cong_thres;
2335 		if (t->intr_lat >= 0) {
2336 			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2337 
2338 			q->coalesce_nsecs = t->intr_lat*1000;
2339 			t3_update_qset_coalesce(qs, q);
2340 		}
2341 		break;
2342 	}
2343 	case CHELSIO_GET_QSET_PARAMS: {
2344 		struct qset_params *q;
2345 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2346 
2347 		if (t->qset_idx >= SGE_QSETS)
2348 			return (EINVAL);
2349 
2350 		q = &(sc)->params.sge.qset[t->qset_idx];
2351 		t->rspq_size   = q->rspq_size;
2352 		t->txq_size[0] = q->txq_size[0];
2353 		t->txq_size[1] = q->txq_size[1];
2354 		t->txq_size[2] = q->txq_size[2];
2355 		t->fl_size[0]  = q->fl_size;
2356 		t->fl_size[1]  = q->jumbo_size;
2357 		t->polling     = q->polling;
2358 		t->intr_lat    = q->coalesce_nsecs / 1000;
2359 		t->cong_thres  = q->cong_thres;
2360 		break;
2361 	}
2362 	case CHELSIO_SET_QSET_NUM: {
2363 		struct ch_reg *edata = (struct ch_reg *)data;
2364 		unsigned int port_idx = pi->port_id;
2365 
2366 		if (sc->flags & FULL_INIT_DONE)
2367 			return (EBUSY);
2368 		if (edata->val < 1 ||
2369 		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2370 			return (EINVAL);
2371 		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2372 			return (EINVAL);
2373 		sc->port[port_idx].nqsets = edata->val;
2374 		sc->port[0].first_qset = 0;
2375 		/*
2376 		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2377 		 */
2378 		sc->port[1].first_qset = sc->port[0].nqsets;
2379 		break;
2380 	}
2381 	case CHELSIO_GET_QSET_NUM: {
2382 		struct ch_reg *edata = (struct ch_reg *)data;
2383 		edata->val = pi->nqsets;
2384 		break;
2385 	}
2386 #ifdef notyet
2387 	case CHELSIO_LOAD_FW:
2388 	case CHELSIO_GET_PM:
2389 	case CHELSIO_SET_PM:
2390 		return (EOPNOTSUPP);
2391 		break;
2392 #endif
2393 	case CHELSIO_SETMTUTAB: {
2394 		struct ch_mtus *m = (struct ch_mtus *)data;
2395 		int i;
2396 
2397 		if (!is_offload(sc))
2398 			return (EOPNOTSUPP);
2399 		if (offload_running(sc))
2400 			return (EBUSY);
2401 		if (m->nmtus != NMTUS)
2402 			return (EINVAL);
2403 		if (m->mtus[0] < 81)         /* accommodate SACK */
2404 			return (EINVAL);
2405 
2406 		/*
2407 		 * MTUs must be in ascending order
2408 		 */
2409 		for (i = 1; i < NMTUS; ++i)
2410 			if (m->mtus[i] < m->mtus[i - 1])
2411 				return (EINVAL);
2412 
2413 		memcpy(sc->params.mtus, m->mtus,
2414 		       sizeof(sc->params.mtus));
2415 		break;
2416 	}
2417 	case CHELSIO_GETMTUTAB: {
2418 		struct ch_mtus *m = (struct ch_mtus *)data;
2419 
2420 		if (!is_offload(sc))
2421 			return (EOPNOTSUPP);
2422 
2423 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2424 		m->nmtus = NMTUS;
2425 		break;
2426 	}
2427 	case CHELSIO_DEVUP:
2428 		if (!is_offload(sc))
2429 			return (EOPNOTSUPP);
2430 		return offload_open(pi);
2431 		break;
2432 	case CHELSIO_GET_MEM: {
2433 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2434 		struct mc7 *mem;
2435 		uint8_t *useraddr;
2436 		u64 buf[32];
2437 
2438 		if (!is_offload(sc))
2439 			return (EOPNOTSUPP);
2440 		if (!(sc->flags & FULL_INIT_DONE))
2441 			return (EIO);         /* need the memory controllers */
2442 		if ((t->addr & 0x7) || (t->len & 0x7))
2443 			return (EINVAL);
2444 		if (t->mem_id == MEM_CM)
2445 			mem = &sc->cm;
2446 		else if (t->mem_id == MEM_PMRX)
2447 			mem = &sc->pmrx;
2448 		else if (t->mem_id == MEM_PMTX)
2449 			mem = &sc->pmtx;
2450 		else
2451 			return (EINVAL);
2452 
2453 		/*
2454 		 * Version scheme:
2455 		 * bits 0..9: chip version
2456 		 * bits 10..15: chip revision
2457 		 */
2458 		t->version = 3 | (sc->params.rev << 10);
2459 
2460 		/*
2461 		 * Read 256 bytes at a time as len can be large and we don't
2462 		 * want to use huge intermediate buffers.
2463 		 */
2464 		useraddr = (uint8_t *)t->buf;
2465 		while (t->len) {
2466 			unsigned int chunk = min(t->len, sizeof(buf));
2467 
2468 			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2469 			if (error)
2470 				return (-error);
2471 			if (copyout(buf, useraddr, chunk))
2472 				return (EFAULT);
2473 			useraddr += chunk;
2474 			t->addr += chunk;
2475 			t->len -= chunk;
2476 		}
2477 		break;
2478 	}
2479 	case CHELSIO_READ_TCAM_WORD: {
2480 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2481 
2482 		if (!is_offload(sc))
2483 			return (EOPNOTSUPP);
2484 		if (!(sc->flags & FULL_INIT_DONE))
2485 			return (EIO);         /* need MC5 */
2486 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2487 		break;
2488 	}
2489 	case CHELSIO_SET_TRACE_FILTER: {
2490 		struct ch_trace *t = (struct ch_trace *)data;
2491 		const struct trace_params *tp;
2492 
2493 		tp = (const struct trace_params *)&t->sip;
2494 		if (t->config_tx)
2495 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2496 					       t->trace_tx);
2497 		if (t->config_rx)
2498 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2499 					       t->trace_rx);
2500 		break;
2501 	}
2502 	case CHELSIO_SET_PKTSCHED: {
2503 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2504 		if (sc->open_device_map == 0)
2505 			return (EAGAIN);
2506 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2507 		    p->binding);
2508 		break;
2509 	}
2510 	case CHELSIO_IFCONF_GETREGS: {
2511 		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2512 		int reglen = cxgb_get_regs_len();
2513 		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2514 		if (buf == NULL) {
2515 			return (ENOMEM);
2516 		} if (regs->len > reglen)
2517 			regs->len = reglen;
2518 		else if (regs->len < reglen) {
2519 			error = E2BIG;
2520 			goto done;
2521 		}
2522 		cxgb_get_regs(sc, regs, buf);
2523 		error = copyout(buf, regs->data, reglen);
2524 
2525 		done:
2526 		free(buf, M_DEVBUF);
2527 
2528 		break;
2529 	}
2530 	case CHELSIO_SET_HW_SCHED: {
2531 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2532 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2533 
2534 		if ((sc->flags & FULL_INIT_DONE) == 0)
2535 			return (EAGAIN);       /* need TP to be initialized */
2536 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2537 		    !in_range(t->channel, 0, 1) ||
2538 		    !in_range(t->kbps, 0, 10000000) ||
2539 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2540 		    !in_range(t->flow_ipg, 0,
2541 			      dack_ticks_to_usec(sc, 0x7ff)))
2542 			return (EINVAL);
2543 
2544 		if (t->kbps >= 0) {
2545 			error = t3_config_sched(sc, t->kbps, t->sched);
2546 			if (error < 0)
2547 				return (-error);
2548 		}
2549 		if (t->class_ipg >= 0)
2550 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2551 		if (t->flow_ipg >= 0) {
2552 			t->flow_ipg *= 1000;     /* us -> ns */
2553 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2554 		}
2555 		if (t->mode >= 0) {
2556 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2557 
2558 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2559 					 bit, t->mode ? bit : 0);
2560 		}
2561 		if (t->channel >= 0)
2562 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2563 					 1 << t->sched, t->channel << t->sched);
2564 		break;
2565 	}
2566 	default:
2567 		return (EOPNOTSUPP);
2568 		break;
2569 	}
2570 
2571 	return (error);
2572 }
2573 
2574 static __inline void
2575 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2576     unsigned int end)
2577 {
2578 	uint32_t *p = (uint32_t *)buf + start;
2579 
2580 	for ( ; start <= end; start += sizeof(uint32_t))
2581 		*p++ = t3_read_reg(ap, start);
2582 }
2583 
2584 #define T3_REGMAP_SIZE (3 * 1024)
2585 static int
2586 cxgb_get_regs_len(void)
2587 {
2588 	return T3_REGMAP_SIZE;
2589 }
2590 #undef T3_REGMAP_SIZE
2591 
2592 static void
2593 cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2594 {
2595 
2596 	/*
2597 	 * Version scheme:
2598 	 * bits 0..9: chip version
2599 	 * bits 10..15: chip revision
2600 	 * bit 31: set for PCIe cards
2601 	 */
2602 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2603 
2604 	/*
2605 	 * We skip the MAC statistics registers because they are clear-on-read.
2606 	 * Also reading multi-register stats would need to synchronize with the
2607 	 * periodic mac stats accumulation.  Hard to justify the complexity.
2608 	 */
2609 	memset(buf, 0, REGDUMP_SIZE);
2610 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2611 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2612 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2613 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2614 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2615 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2616 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2617 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2618 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2619 }
2620