xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 4ed925457ab06e83238a5db33e89ccc94b99a713)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_tick_handler(void *, int);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102 
103 /* Attachment glue for the PCI controller end of the device.  Each port of
104  * the device is attached separately, as defined later.
105  */
106 static int cxgb_controller_probe(device_t);
107 static int cxgb_controller_attach(device_t);
108 static int cxgb_controller_detach(device_t);
109 static void cxgb_free(struct adapter *);
110 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111     unsigned int end);
112 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
113 static int cxgb_get_regs_len(void);
114 static int offload_open(struct port_info *pi);
115 static void touch_bars(device_t dev);
116 static int offload_close(struct t3cdev *tdev);
117 static void cxgb_update_mac_settings(struct port_info *p);
118 
119 static device_method_t cxgb_controller_methods[] = {
120 	DEVMETHOD(device_probe,		cxgb_controller_probe),
121 	DEVMETHOD(device_attach,	cxgb_controller_attach),
122 	DEVMETHOD(device_detach,	cxgb_controller_detach),
123 
124 	/* bus interface */
125 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
126 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
127 
128 	{ 0, 0 }
129 };
130 
131 static driver_t cxgb_controller_driver = {
132 	"cxgbc",
133 	cxgb_controller_methods,
134 	sizeof(struct adapter)
135 };
136 
137 static devclass_t	cxgb_controller_devclass;
138 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
139 
140 /*
141  * Attachment glue for the ports.  Attachment is done directly to the
142  * controller device.
143  */
144 static int cxgb_port_probe(device_t);
145 static int cxgb_port_attach(device_t);
146 static int cxgb_port_detach(device_t);
147 
148 static device_method_t cxgb_port_methods[] = {
149 	DEVMETHOD(device_probe,		cxgb_port_probe),
150 	DEVMETHOD(device_attach,	cxgb_port_attach),
151 	DEVMETHOD(device_detach,	cxgb_port_detach),
152 	{ 0, 0 }
153 };
154 
155 static driver_t cxgb_port_driver = {
156 	"cxgb",
157 	cxgb_port_methods,
158 	0
159 };
160 
161 static d_ioctl_t cxgb_extension_ioctl;
162 static d_open_t cxgb_extension_open;
163 static d_close_t cxgb_extension_close;
164 
165 static struct cdevsw cxgb_cdevsw = {
166        .d_version =    D_VERSION,
167        .d_flags =      0,
168        .d_open =       cxgb_extension_open,
169        .d_close =      cxgb_extension_close,
170        .d_ioctl =      cxgb_extension_ioctl,
171        .d_name =       "cxgb",
172 };
173 
174 static devclass_t	cxgb_port_devclass;
175 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
176 
177 /*
178  * The driver uses the best interrupt scheme available on a platform in the
179  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
180  * of these schemes the driver may consider as follows:
181  *
182  * msi = 2: choose from among all three options
183  * msi = 1 : only consider MSI and pin interrupts
184  * msi = 0: force pin interrupts
185  */
186 static int msi_allowed = 2;
187 
188 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
189 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
190 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
191     "MSI-X, MSI, INTx selector");
192 
193 /*
194  * The driver enables offload as a default.
195  * To disable it, use ofld_disable = 1.
196  */
197 static int ofld_disable = 0;
198 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
199 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
200     "disable ULP offload");
201 
202 /*
203  * The driver uses an auto-queue algorithm by default.
204  * To disable it and force a single queue-set per port, use multiq = 0
205  */
206 static int multiq = 1;
207 TUNABLE_INT("hw.cxgb.multiq", &multiq);
208 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
209     "use min(ncpus/ports, 8) queue-sets per port");
210 
211 /*
212  * By default the driver will not update the firmware unless
213  * it was compiled against a newer version
214  *
215  */
216 static int force_fw_update = 0;
217 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
218 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
219     "update firmware even if up to date");
220 
221 int cxgb_use_16k_clusters = 1;
222 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
223 SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
224     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
225 
226 /*
227  * Tune the size of the output queue.
228  */
229 int cxgb_snd_queue_len = IFQ_MAXLEN;
230 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
231 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
232     &cxgb_snd_queue_len, 0, "send queue size ");
233 
234 
235 enum {
236 	MAX_TXQ_ENTRIES      = 16384,
237 	MAX_CTRL_TXQ_ENTRIES = 1024,
238 	MAX_RSPQ_ENTRIES     = 16384,
239 	MAX_RX_BUFFERS       = 16384,
240 	MAX_RX_JUMBO_BUFFERS = 16384,
241 	MIN_TXQ_ENTRIES      = 4,
242 	MIN_CTRL_TXQ_ENTRIES = 4,
243 	MIN_RSPQ_ENTRIES     = 32,
244 	MIN_FL_ENTRIES       = 32,
245 	MIN_FL_JUMBO_ENTRIES = 32
246 };
247 
248 struct filter_info {
249 	u32 sip;
250 	u32 sip_mask;
251 	u32 dip;
252 	u16 sport;
253 	u16 dport;
254 	u32 vlan:12;
255 	u32 vlan_prio:3;
256 	u32 mac_hit:1;
257 	u32 mac_idx:4;
258 	u32 mac_vld:1;
259 	u32 pkt_type:2;
260 	u32 report_filter_id:1;
261 	u32 pass:1;
262 	u32 rss:1;
263 	u32 qset:3;
264 	u32 locked:1;
265 	u32 valid:1;
266 };
267 
268 enum { FILTER_NO_VLAN_PRI = 7 };
269 
270 #define EEPROM_MAGIC 0x38E2F10C
271 
272 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
273 
274 /* Table for probing the cards.  The desc field isn't actually used */
275 struct cxgb_ident {
276 	uint16_t	vendor;
277 	uint16_t	device;
278 	int		index;
279 	char		*desc;
280 } cxgb_identifiers[] = {
281 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295 	{0, 0, 0, NULL}
296 };
297 
298 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299 
300 
301 static __inline char
302 t3rev2char(struct adapter *adapter)
303 {
304 	char rev = 'z';
305 
306 	switch(adapter->params.rev) {
307 	case T3_REV_A:
308 		rev = 'a';
309 		break;
310 	case T3_REV_B:
311 	case T3_REV_B2:
312 		rev = 'b';
313 		break;
314 	case T3_REV_C:
315 		rev = 'c';
316 		break;
317 	}
318 	return rev;
319 }
320 
321 static struct cxgb_ident *
322 cxgb_get_ident(device_t dev)
323 {
324 	struct cxgb_ident *id;
325 
326 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
327 		if ((id->vendor == pci_get_vendor(dev)) &&
328 		    (id->device == pci_get_device(dev))) {
329 			return (id);
330 		}
331 	}
332 	return (NULL);
333 }
334 
335 static const struct adapter_info *
336 cxgb_get_adapter_info(device_t dev)
337 {
338 	struct cxgb_ident *id;
339 	const struct adapter_info *ai;
340 
341 	id = cxgb_get_ident(dev);
342 	if (id == NULL)
343 		return (NULL);
344 
345 	ai = t3_get_adapter_info(id->index);
346 
347 	return (ai);
348 }
349 
350 static int
351 cxgb_controller_probe(device_t dev)
352 {
353 	const struct adapter_info *ai;
354 	char *ports, buf[80];
355 	int nports;
356 
357 	ai = cxgb_get_adapter_info(dev);
358 	if (ai == NULL)
359 		return (ENXIO);
360 
361 	nports = ai->nports0 + ai->nports1;
362 	if (nports == 1)
363 		ports = "port";
364 	else
365 		ports = "ports";
366 
367 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368 	device_set_desc_copy(dev, buf);
369 	return (BUS_PROBE_DEFAULT);
370 }
371 
372 #define FW_FNAME "cxgb_t3fw"
373 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375 
376 static int
377 upgrade_fw(adapter_t *sc)
378 {
379 #ifdef FIRMWARE_LATEST
380 	const struct firmware *fw;
381 #else
382 	struct firmware *fw;
383 #endif
384 	int status;
385 
386 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
387 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
388 		return (ENOENT);
389 	} else
390 		device_printf(sc->dev, "updating firmware on card\n");
391 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
392 
393 	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
394 
395 	firmware_put(fw, FIRMWARE_UNLOAD);
396 
397 	return (status);
398 }
399 
400 /*
401  * The cxgb_controller_attach function is responsible for the initial
402  * bringup of the device.  Its responsibilities include:
403  *
404  *  1. Determine if the device supports MSI or MSI-X.
405  *  2. Allocate bus resources so that we can access the Base Address Register
406  *  3. Create and initialize mutexes for the controller and its control
407  *     logic such as SGE and MDIO.
408  *  4. Call hardware specific setup routine for the adapter as a whole.
409  *  5. Allocate the BAR for doing MSI-X.
410  *  6. Setup the line interrupt iff MSI-X is not supported.
411  *  7. Create the driver's taskq.
412  *  8. Start one task queue service thread.
413  *  9. Check if the firmware and SRAM are up-to-date.  They will be
414  *     auto-updated later (before FULL_INIT_DONE), if required.
415  * 10. Create a child device for each MAC (port)
416  * 11. Initialize T3 private state.
417  * 12. Trigger the LED
418  * 13. Setup offload iff supported.
419  * 14. Reset/restart the tick callout.
420  * 15. Attach sysctls
421  *
422  * NOTE: Any modification or deviation from this list MUST be reflected in
423  * the above comment.  Failure to do so will result in problems on various
424  * error conditions including link flapping.
425  */
426 static int
427 cxgb_controller_attach(device_t dev)
428 {
429 	device_t child;
430 	const struct adapter_info *ai;
431 	struct adapter *sc;
432 	int i, error = 0;
433 	uint32_t vers;
434 	int port_qsets = 1;
435 #ifdef MSI_SUPPORTED
436 	int msi_needed, reg;
437 #endif
438 	char buf[80];
439 
440 	sc = device_get_softc(dev);
441 	sc->dev = dev;
442 	sc->msi_count = 0;
443 	ai = cxgb_get_adapter_info(dev);
444 
445 	/*
446 	 * XXX not really related but a recent addition
447 	 */
448 #ifdef MSI_SUPPORTED
449 	/* find the PCIe link width and set max read request to 4KB*/
450 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
451 		uint16_t lnk, pectl;
452 		lnk = pci_read_config(dev, reg + 0x12, 2);
453 		sc->link_width = (lnk >> 4) & 0x3f;
454 
455 		pectl = pci_read_config(dev, reg + 0x8, 2);
456 		pectl = (pectl & ~0x7000) | (5 << 12);
457 		pci_write_config(dev, reg + 0x8, pectl, 2);
458 	}
459 
460 	if (sc->link_width != 0 && sc->link_width <= 4 &&
461 	    (ai->nports0 + ai->nports1) <= 2) {
462 		device_printf(sc->dev,
463 		    "PCIe x%d Link, expect reduced performance\n",
464 		    sc->link_width);
465 	}
466 #endif
467 	touch_bars(dev);
468 	pci_enable_busmaster(dev);
469 	/*
470 	 * Allocate the registers and make them available to the driver.
471 	 * The registers that we care about for NIC mode are in BAR 0
472 	 */
473 	sc->regs_rid = PCIR_BAR(0);
474 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
475 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
476 		device_printf(dev, "Cannot allocate BAR region 0\n");
477 		return (ENXIO);
478 	}
479 	sc->udbs_rid = PCIR_BAR(2);
480 	sc->udbs_res = NULL;
481 	if (is_offload(sc) &&
482 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
483 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
484 		device_printf(dev, "Cannot allocate BAR region 1\n");
485 		error = ENXIO;
486 		goto out;
487 	}
488 
489 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
490 	    device_get_unit(dev));
491 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
492 
493 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
494 	    device_get_unit(dev));
495 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
496 	    device_get_unit(dev));
497 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
498 	    device_get_unit(dev));
499 
500 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
501 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
502 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
503 
504 	sc->bt = rman_get_bustag(sc->regs_res);
505 	sc->bh = rman_get_bushandle(sc->regs_res);
506 	sc->mmio_len = rman_get_size(sc->regs_res);
507 
508 	for (i = 0; i < MAX_NPORTS; i++)
509 		sc->port[i].adapter = sc;
510 
511 	if (t3_prep_adapter(sc, ai, 1) < 0) {
512 		printf("prep adapter failed\n");
513 		error = ENODEV;
514 		goto out;
515 	}
516         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
517 	 * enough messages for the queue sets.  If that fails, try falling
518 	 * back to MSI.  If that fails, then try falling back to the legacy
519 	 * interrupt pin model.
520 	 */
521 #ifdef MSI_SUPPORTED
522 
523 	sc->msix_regs_rid = 0x20;
524 	if ((msi_allowed >= 2) &&
525 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
526 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
527 
528 		if (multiq)
529 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
530 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
531 
532 		if (pci_msix_count(dev) == 0 ||
533 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
534 		    sc->msi_count != msi_needed) {
535 			device_printf(dev, "alloc msix failed - "
536 				      "msi_count=%d, msi_needed=%d, err=%d; "
537 				      "will try MSI\n", sc->msi_count,
538 				      msi_needed, error);
539 			sc->msi_count = 0;
540 			port_qsets = 1;
541 			pci_release_msi(dev);
542 			bus_release_resource(dev, SYS_RES_MEMORY,
543 			    sc->msix_regs_rid, sc->msix_regs_res);
544 			sc->msix_regs_res = NULL;
545 		} else {
546 			sc->flags |= USING_MSIX;
547 			sc->cxgb_intr = cxgb_async_intr;
548 			device_printf(dev,
549 				      "using MSI-X interrupts (%u vectors)\n",
550 				      sc->msi_count);
551 		}
552 	}
553 
554 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
555 		sc->msi_count = 1;
556 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
557 			device_printf(dev, "alloc msi failed - "
558 				      "err=%d; will try INTx\n", error);
559 			sc->msi_count = 0;
560 			port_qsets = 1;
561 			pci_release_msi(dev);
562 		} else {
563 			sc->flags |= USING_MSI;
564 			sc->cxgb_intr = t3_intr_msi;
565 			device_printf(dev, "using MSI interrupts\n");
566 		}
567 	}
568 #endif
569 	if (sc->msi_count == 0) {
570 		device_printf(dev, "using line interrupts\n");
571 		sc->cxgb_intr = t3b_intr;
572 	}
573 
574 	/* Create a private taskqueue thread for handling driver events */
575 #ifdef TASKQUEUE_CURRENT
576 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
577 	    taskqueue_thread_enqueue, &sc->tq);
578 #else
579 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
580 	    taskqueue_thread_enqueue, &sc->tq);
581 #endif
582 	if (sc->tq == NULL) {
583 		device_printf(dev, "failed to allocate controller task queue\n");
584 		goto out;
585 	}
586 
587 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
588 	    device_get_nameunit(dev));
589 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
590 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591 
592 
593 	/* Create a periodic callout for checking adapter status */
594 	callout_init(&sc->cxgb_tick_ch, TRUE);
595 
596 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597 		/*
598 		 * Warn user that a firmware update will be attempted in init.
599 		 */
600 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602 		sc->flags &= ~FW_UPTODATE;
603 	} else {
604 		sc->flags |= FW_UPTODATE;
605 	}
606 
607 	if (t3_check_tpsram_version(sc) < 0) {
608 		/*
609 		 * Warn user that a firmware update will be attempted in init.
610 		 */
611 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613 		sc->flags &= ~TPS_UPTODATE;
614 	} else {
615 		sc->flags |= TPS_UPTODATE;
616 	}
617 
618 	/*
619 	 * Create a child device for each MAC.  The ethernet attachment
620 	 * will be done in these children.
621 	 */
622 	for (i = 0; i < (sc)->params.nports; i++) {
623 		struct port_info *pi;
624 
625 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626 			device_printf(dev, "failed to add child port\n");
627 			error = EINVAL;
628 			goto out;
629 		}
630 		pi = &sc->port[i];
631 		pi->adapter = sc;
632 		pi->nqsets = port_qsets;
633 		pi->first_qset = i*port_qsets;
634 		pi->port_id = i;
635 		pi->tx_chan = i >= ai->nports0;
636 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637 		sc->rxpkt_map[pi->txpkt_intf] = i;
638 		sc->port[i].tx_chan = i >= ai->nports0;
639 		sc->portdev[i] = child;
640 		device_set_softc(child, pi);
641 	}
642 	if ((error = bus_generic_attach(dev)) != 0)
643 		goto out;
644 
645 	/* initialize sge private state */
646 	t3_sge_init_adapter(sc);
647 
648 	t3_led_ready(sc);
649 
650 	cxgb_offload_init();
651 	if (is_offload(sc)) {
652 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
653 		cxgb_adapter_ofld(sc);
654         }
655 	error = t3_get_fw_version(sc, &vers);
656 	if (error)
657 		goto out;
658 
659 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661 	    G_FW_VERSION_MICRO(vers));
662 
663 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664 		 ai->desc, is_offload(sc) ? "R" : "",
665 		 sc->params.vpd.ec, sc->params.vpd.sn);
666 	device_set_desc_copy(dev, buf);
667 
668 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671 
672 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
674 	t3_add_attach_sysctls(sc);
675 out:
676 	if (error)
677 		cxgb_free(sc);
678 
679 	return (error);
680 }
681 
682 /*
683  * The cxgb_controller_detach routine is called with the device is
684  * unloaded from the system.
685  */
686 
687 static int
688 cxgb_controller_detach(device_t dev)
689 {
690 	struct adapter *sc;
691 
692 	sc = device_get_softc(dev);
693 
694 	cxgb_free(sc);
695 
696 	return (0);
697 }
698 
699 /*
700  * The cxgb_free() is called by the cxgb_controller_detach() routine
701  * to tear down the structures that were built up in
702  * cxgb_controller_attach(), and should be the final piece of work
703  * done when fully unloading the driver.
704  *
705  *
706  *  1. Shutting down the threads started by the cxgb_controller_attach()
707  *     routine.
708  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
709  *  3. Detaching all of the port devices created during the
710  *     cxgb_controller_attach() routine.
711  *  4. Removing the device children created via cxgb_controller_attach().
712  *  5. Releasing PCI resources associated with the device.
713  *  6. Turning off the offload support, iff it was turned on.
714  *  7. Destroying the mutexes created in cxgb_controller_attach().
715  *
716  */
717 static void
718 cxgb_free(struct adapter *sc)
719 {
720 	int i;
721 
722 	ADAPTER_LOCK(sc);
723 	sc->flags |= CXGB_SHUTDOWN;
724 	ADAPTER_UNLOCK(sc);
725 
726 	/*
727 	 * Make sure all child devices are gone.
728 	 */
729 	bus_generic_detach(sc->dev);
730 	for (i = 0; i < (sc)->params.nports; i++) {
731 		if (sc->portdev[i] &&
732 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
733 			device_printf(sc->dev, "failed to delete child port\n");
734 	}
735 
736 	/*
737 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
738 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
739 	 * all open devices have been closed.
740 	 */
741 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
742 					   __func__, sc->open_device_map));
743 	for (i = 0; i < sc->params.nports; i++) {
744 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
745 						  __func__, i));
746 	}
747 
748 	/*
749 	 * Finish off the adapter's callouts.
750 	 */
751 	callout_drain(&sc->cxgb_tick_ch);
752 	callout_drain(&sc->sge_timer_ch);
753 
754 	/*
755 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
756 	 * sysctls are cleaned up by the kernel linker.
757 	 */
758 	if (sc->flags & FULL_INIT_DONE) {
759  		t3_free_sge_resources(sc);
760  		sc->flags &= ~FULL_INIT_DONE;
761  	}
762 
763 	/*
764 	 * Release all interrupt resources.
765 	 */
766 	cxgb_teardown_interrupts(sc);
767 #ifdef MSI_SUPPORTED
768 	if (sc->flags & (USING_MSI | USING_MSIX)) {
769 		device_printf(sc->dev, "releasing msi message(s)\n");
770 		pci_release_msi(sc->dev);
771 	} else {
772 		device_printf(sc->dev, "no msi message to release\n");
773 	}
774 
775 	if (sc->msix_regs_res != NULL) {
776 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
777 		    sc->msix_regs_res);
778 	}
779 #endif
780 
781 	/*
782 	 * Free the adapter's taskqueue.
783 	 */
784 	if (sc->tq != NULL) {
785 		taskqueue_free(sc->tq);
786 		sc->tq = NULL;
787 	}
788 
789 	if (is_offload(sc)) {
790 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
791 		cxgb_adapter_unofld(sc);
792 	}
793 
794 #ifdef notyet
795 	if (sc->flags & CXGB_OFLD_INIT)
796 		cxgb_offload_deactivate(sc);
797 #endif
798 	free(sc->filters, M_DEVBUF);
799 	t3_sge_free(sc);
800 
801 	cxgb_offload_exit();
802 
803 	if (sc->udbs_res != NULL)
804 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
805 		    sc->udbs_res);
806 
807 	if (sc->regs_res != NULL)
808 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
809 		    sc->regs_res);
810 
811 	MTX_DESTROY(&sc->mdio_lock);
812 	MTX_DESTROY(&sc->sge.reg_lock);
813 	MTX_DESTROY(&sc->elmer_lock);
814 	ADAPTER_LOCK_DEINIT(sc);
815 }
816 
817 /**
818  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
819  *	@sc: the controller softc
820  *
821  *	Determines how many sets of SGE queues to use and initializes them.
822  *	We support multiple queue sets per port if we have MSI-X, otherwise
823  *	just one queue set per port.
824  */
825 static int
826 setup_sge_qsets(adapter_t *sc)
827 {
828 	int i, j, err, irq_idx = 0, qset_idx = 0;
829 	u_int ntxq = SGE_TXQ_PER_SET;
830 
831 	if ((err = t3_sge_alloc(sc)) != 0) {
832 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
833 		return (err);
834 	}
835 
836 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
837 		irq_idx = -1;
838 
839 	for (i = 0; i < (sc)->params.nports; i++) {
840 		struct port_info *pi = &sc->port[i];
841 
842 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
843 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
844 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
845 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
846 			if (err) {
847 				t3_free_sge_resources(sc);
848 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
849 				    err);
850 				return (err);
851 			}
852 		}
853 	}
854 
855 	return (0);
856 }
857 
858 static void
859 cxgb_teardown_interrupts(adapter_t *sc)
860 {
861 	int i;
862 
863 	for (i = 0; i < SGE_QSETS; i++) {
864 		if (sc->msix_intr_tag[i] == NULL) {
865 
866 			/* Should have been setup fully or not at all */
867 			KASSERT(sc->msix_irq_res[i] == NULL &&
868 				sc->msix_irq_rid[i] == 0,
869 				("%s: half-done interrupt (%d).", __func__, i));
870 
871 			continue;
872 		}
873 
874 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
875 				  sc->msix_intr_tag[i]);
876 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
877 				     sc->msix_irq_res[i]);
878 
879 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
880 		sc->msix_irq_rid[i] = 0;
881 	}
882 
883 	if (sc->intr_tag) {
884 		KASSERT(sc->irq_res != NULL,
885 			("%s: half-done interrupt.", __func__));
886 
887 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
888 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
889 				     sc->irq_res);
890 
891 		sc->irq_res = sc->intr_tag = NULL;
892 		sc->irq_rid = 0;
893 	}
894 }
895 
896 static int
897 cxgb_setup_interrupts(adapter_t *sc)
898 {
899 	struct resource *res;
900 	void *tag;
901 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
902 
903 	sc->irq_rid = intr_flag ? 1 : 0;
904 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
905 					     RF_SHAREABLE | RF_ACTIVE);
906 	if (sc->irq_res == NULL) {
907 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
908 			      intr_flag, sc->irq_rid);
909 		err = EINVAL;
910 		sc->irq_rid = 0;
911 	} else {
912 		err = bus_setup_intr(sc->dev, sc->irq_res,
913 				     INTR_MPSAFE | INTR_TYPE_NET,
914 #ifdef INTR_FILTERS
915 				     NULL,
916 #endif
917 				     sc->cxgb_intr, sc, &sc->intr_tag);
918 
919 		if (err) {
920 			device_printf(sc->dev,
921 				      "Cannot set up interrupt (%x, %u, %d)\n",
922 				      intr_flag, sc->irq_rid, err);
923 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
924 					     sc->irq_res);
925 			sc->irq_res = sc->intr_tag = NULL;
926 			sc->irq_rid = 0;
927 		}
928 	}
929 
930 	/* That's all for INTx or MSI */
931 	if (!(intr_flag & USING_MSIX) || err)
932 		return (err);
933 
934 	for (i = 0; i < sc->msi_count - 1; i++) {
935 		rid = i + 2;
936 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
937 					     RF_SHAREABLE | RF_ACTIVE);
938 		if (res == NULL) {
939 			device_printf(sc->dev, "Cannot allocate interrupt "
940 				      "for message %d\n", rid);
941 			err = EINVAL;
942 			break;
943 		}
944 
945 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
946 #ifdef INTR_FILTERS
947 				     NULL,
948 #endif
949 				     t3_intr_msix, &sc->sge.qs[i], &tag);
950 		if (err) {
951 			device_printf(sc->dev, "Cannot set up interrupt "
952 				      "for message %d (%d)\n", rid, err);
953 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
954 			break;
955 		}
956 
957 		sc->msix_irq_rid[i] = rid;
958 		sc->msix_irq_res[i] = res;
959 		sc->msix_intr_tag[i] = tag;
960 	}
961 
962 	if (err)
963 		cxgb_teardown_interrupts(sc);
964 
965 	return (err);
966 }
967 
968 
969 static int
970 cxgb_port_probe(device_t dev)
971 {
972 	struct port_info *p;
973 	char buf[80];
974 	const char *desc;
975 
976 	p = device_get_softc(dev);
977 	desc = p->phy.desc;
978 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
979 	device_set_desc_copy(dev, buf);
980 	return (0);
981 }
982 
983 
984 static int
985 cxgb_makedev(struct port_info *pi)
986 {
987 
988 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
989 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
990 
991 	if (pi->port_cdev == NULL)
992 		return (ENOMEM);
993 
994 	pi->port_cdev->si_drv1 = (void *)pi;
995 
996 	return (0);
997 }
998 
999 #ifndef LRO_SUPPORTED
1000 #ifdef IFCAP_LRO
1001 #undef IFCAP_LRO
1002 #endif
1003 #define IFCAP_LRO 0x0
1004 #endif
1005 
1006 #ifdef TSO_SUPPORTED
1007 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
1008 /* Don't enable TSO6 yet */
1009 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
1010 #else
1011 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
1012 /* Don't enable TSO6 yet */
1013 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
1014 #define IFCAP_TSO4 0x0
1015 #define IFCAP_TSO6 0x0
1016 #define CSUM_TSO   0x0
1017 #endif
1018 
1019 
1020 static int
1021 cxgb_port_attach(device_t dev)
1022 {
1023 	struct port_info *p;
1024 	struct ifnet *ifp;
1025 	int err;
1026 	struct adapter *sc;
1027 
1028 
1029 	p = device_get_softc(dev);
1030 	sc = p->adapter;
1031 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1032 	    device_get_unit(device_get_parent(dev)), p->port_id);
1033 	PORT_LOCK_INIT(p, p->lockbuf);
1034 
1035 	/* Allocate an ifnet object and set it up */
1036 	ifp = p->ifp = if_alloc(IFT_ETHER);
1037 	if (ifp == NULL) {
1038 		device_printf(dev, "Cannot allocate ifnet\n");
1039 		return (ENOMEM);
1040 	}
1041 
1042 	/*
1043 	 * Note that there is currently no watchdog timer.
1044 	 */
1045 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1046 	ifp->if_init = cxgb_init;
1047 	ifp->if_softc = p;
1048 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1049 	ifp->if_ioctl = cxgb_ioctl;
1050 	ifp->if_start = cxgb_start;
1051 
1052 	ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1053 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1054 	IFQ_SET_READY(&ifp->if_snd);
1055 
1056 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
1057 	ifp->if_capabilities |= CXGB_CAP;
1058 	ifp->if_capenable |= CXGB_CAP_ENABLE;
1059 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
1060 	/*
1061 	 * disable TSO on 4-port - it isn't supported by the firmware yet
1062 	 */
1063 	if (p->adapter->params.nports > 2) {
1064 		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1065 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1066 		ifp->if_hwassist &= ~CSUM_TSO;
1067 	}
1068 
1069 	ether_ifattach(ifp, p->hw_addr);
1070 	ifp->if_transmit = cxgb_transmit;
1071 	ifp->if_qflush = cxgb_qflush;
1072 
1073 	/*
1074 	 * Only default to jumbo frames on 10GigE
1075 	 */
1076 	if (p->adapter->params.nports <= 2)
1077 		ifp->if_mtu = ETHERMTU_JUMBO;
1078 	if ((err = cxgb_makedev(p)) != 0) {
1079 		printf("makedev failed %d\n", err);
1080 		return (err);
1081 	}
1082 
1083 	/* Create a list of media supported by this port */
1084 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1085 	    cxgb_media_status);
1086 	cxgb_build_medialist(p);
1087 
1088 	t3_sge_init_port(p);
1089 
1090 	return (err);
1091 }
1092 
1093 /*
1094  * cxgb_port_detach() is called via the device_detach methods when
1095  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1096  * removing the device from the view of the kernel, i.e. from all
1097  * interfaces lists etc.  This routine is only called when the driver is
1098  * being unloaded, not when the link goes down.
1099  */
1100 static int
1101 cxgb_port_detach(device_t dev)
1102 {
1103 	struct port_info *p;
1104 	struct adapter *sc;
1105 	int i;
1106 
1107 	p = device_get_softc(dev);
1108 	sc = p->adapter;
1109 
1110 	/* Tell cxgb_ioctl and if_init that the port is going away */
1111 	ADAPTER_LOCK(sc);
1112 	SET_DOOMED(p);
1113 	wakeup(&sc->flags);
1114 	while (IS_BUSY(sc))
1115 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1116 	SET_BUSY(sc);
1117 	ADAPTER_UNLOCK(sc);
1118 
1119 	if (p->port_cdev != NULL)
1120 		destroy_dev(p->port_cdev);
1121 
1122 	cxgb_uninit_synchronized(p);
1123 	ether_ifdetach(p->ifp);
1124 
1125 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1126 		struct sge_qset *qs = &sc->sge.qs[i];
1127 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1128 
1129 		callout_drain(&txq->txq_watchdog);
1130 		callout_drain(&txq->txq_timer);
1131 	}
1132 
1133 	PORT_LOCK_DEINIT(p);
1134 	if_free(p->ifp);
1135 	p->ifp = NULL;
1136 
1137 	ADAPTER_LOCK(sc);
1138 	CLR_BUSY(sc);
1139 	wakeup_one(&sc->flags);
1140 	ADAPTER_UNLOCK(sc);
1141 	return (0);
1142 }
1143 
1144 void
1145 t3_fatal_err(struct adapter *sc)
1146 {
1147 	u_int fw_status[4];
1148 
1149 	if (sc->flags & FULL_INIT_DONE) {
1150 		t3_sge_stop(sc);
1151 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1152 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1153 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1154 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1155 		t3_intr_disable(sc);
1156 	}
1157 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1158 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1159 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1160 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1161 }
1162 
1163 int
1164 t3_os_find_pci_capability(adapter_t *sc, int cap)
1165 {
1166 	device_t dev;
1167 	struct pci_devinfo *dinfo;
1168 	pcicfgregs *cfg;
1169 	uint32_t status;
1170 	uint8_t ptr;
1171 
1172 	dev = sc->dev;
1173 	dinfo = device_get_ivars(dev);
1174 	cfg = &dinfo->cfg;
1175 
1176 	status = pci_read_config(dev, PCIR_STATUS, 2);
1177 	if (!(status & PCIM_STATUS_CAPPRESENT))
1178 		return (0);
1179 
1180 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1181 	case 0:
1182 	case 1:
1183 		ptr = PCIR_CAP_PTR;
1184 		break;
1185 	case 2:
1186 		ptr = PCIR_CAP_PTR_2;
1187 		break;
1188 	default:
1189 		return (0);
1190 		break;
1191 	}
1192 	ptr = pci_read_config(dev, ptr, 1);
1193 
1194 	while (ptr != 0) {
1195 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1196 			return (ptr);
1197 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1198 	}
1199 
1200 	return (0);
1201 }
1202 
1203 int
1204 t3_os_pci_save_state(struct adapter *sc)
1205 {
1206 	device_t dev;
1207 	struct pci_devinfo *dinfo;
1208 
1209 	dev = sc->dev;
1210 	dinfo = device_get_ivars(dev);
1211 
1212 	pci_cfg_save(dev, dinfo, 0);
1213 	return (0);
1214 }
1215 
1216 int
1217 t3_os_pci_restore_state(struct adapter *sc)
1218 {
1219 	device_t dev;
1220 	struct pci_devinfo *dinfo;
1221 
1222 	dev = sc->dev;
1223 	dinfo = device_get_ivars(dev);
1224 
1225 	pci_cfg_restore(dev, dinfo);
1226 	return (0);
1227 }
1228 
1229 /**
1230  *	t3_os_link_changed - handle link status changes
1231  *	@sc: the adapter associated with the link change
1232  *	@port_id: the port index whose link status has changed
1233  *	@link_status: the new status of the link
1234  *	@speed: the new speed setting
1235  *	@duplex: the new duplex setting
1236  *	@fc: the new flow-control setting
1237  *
1238  *	This is the OS-dependent handler for link status changes.  The OS
1239  *	neutral handler takes care of most of the processing for these events,
1240  *	then calls this handler for any OS-specific processing.
1241  */
1242 void
1243 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1244      int duplex, int fc, int mac_was_reset)
1245 {
1246 	struct port_info *pi = &adapter->port[port_id];
1247 	struct ifnet *ifp = pi->ifp;
1248 
1249 	/* no race with detach, so ifp should always be good */
1250 	KASSERT(ifp, ("%s: if detached.", __func__));
1251 
1252 	/* Reapply mac settings if they were lost due to a reset */
1253 	if (mac_was_reset) {
1254 		PORT_LOCK(pi);
1255 		cxgb_update_mac_settings(pi);
1256 		PORT_UNLOCK(pi);
1257 	}
1258 
1259 	if (link_status) {
1260 		ifp->if_baudrate = IF_Mbps(speed);
1261 		if_link_state_change(ifp, LINK_STATE_UP);
1262 	} else
1263 		if_link_state_change(ifp, LINK_STATE_DOWN);
1264 }
1265 
1266 /**
1267  *	t3_os_phymod_changed - handle PHY module changes
1268  *	@phy: the PHY reporting the module change
1269  *	@mod_type: new module type
1270  *
1271  *	This is the OS-dependent handler for PHY module changes.  It is
1272  *	invoked when a PHY module is removed or inserted for any OS-specific
1273  *	processing.
1274  */
1275 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1276 {
1277 	static const char *mod_str[] = {
1278 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1279 	};
1280 	struct port_info *pi = &adap->port[port_id];
1281 	int mod = pi->phy.modtype;
1282 
1283 	if (mod != pi->media.ifm_cur->ifm_data)
1284 		cxgb_build_medialist(pi);
1285 
1286 	if (mod == phy_modtype_none)
1287 		if_printf(pi->ifp, "PHY module unplugged\n");
1288 	else {
1289 		KASSERT(mod < ARRAY_SIZE(mod_str),
1290 			("invalid PHY module type %d", mod));
1291 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1292 	}
1293 }
1294 
1295 /*
1296  * Interrupt-context handler for external (PHY) interrupts.
1297  */
1298 void
1299 t3_os_ext_intr_handler(adapter_t *sc)
1300 {
1301 	if (cxgb_debug)
1302 		printf("t3_os_ext_intr_handler\n");
1303 	/*
1304 	 * Schedule a task to handle external interrupts as they may be slow
1305 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1306 	 * interrupts in the meantime and let the task reenable them when
1307 	 * it's done.
1308 	 */
1309 	if (sc->slow_intr_mask) {
1310 		ADAPTER_LOCK(sc);
1311 		sc->slow_intr_mask &= ~F_T3DBG;
1312 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1313 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1314 		ADAPTER_UNLOCK(sc);
1315 	}
1316 }
1317 
1318 void
1319 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1320 {
1321 
1322 	/*
1323 	 * The ifnet might not be allocated before this gets called,
1324 	 * as this is called early on in attach by t3_prep_adapter
1325 	 * save the address off in the port structure
1326 	 */
1327 	if (cxgb_debug)
1328 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1329 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1330 }
1331 
1332 /*
1333  * Programs the XGMAC based on the settings in the ifnet.  These settings
1334  * include MTU, MAC address, mcast addresses, etc.
1335  */
1336 static void
1337 cxgb_update_mac_settings(struct port_info *p)
1338 {
1339 	struct ifnet *ifp = p->ifp;
1340 	struct t3_rx_mode rm;
1341 	struct cmac *mac = &p->mac;
1342 	int mtu, hwtagging;
1343 
1344 	PORT_LOCK_ASSERT_OWNED(p);
1345 
1346 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1347 
1348 	mtu = ifp->if_mtu;
1349 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1350 		mtu += ETHER_VLAN_ENCAP_LEN;
1351 
1352 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1353 
1354 	t3_mac_set_mtu(mac, mtu);
1355 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1356 	t3_mac_set_address(mac, 0, p->hw_addr);
1357 	t3_init_rx_mode(&rm, p);
1358 	t3_mac_set_rx_mode(mac, &rm);
1359 }
1360 
1361 
1362 static int
1363 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1364 			      unsigned long n)
1365 {
1366 	int attempts = 5;
1367 
1368 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1369 		if (!--attempts)
1370 			return (ETIMEDOUT);
1371 		t3_os_sleep(10);
1372 	}
1373 	return 0;
1374 }
1375 
1376 static int
1377 init_tp_parity(struct adapter *adap)
1378 {
1379 	int i;
1380 	struct mbuf *m;
1381 	struct cpl_set_tcb_field *greq;
1382 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1383 
1384 	t3_tp_set_offload_mode(adap, 1);
1385 
1386 	for (i = 0; i < 16; i++) {
1387 		struct cpl_smt_write_req *req;
1388 
1389 		m = m_gethdr(M_WAITOK, MT_DATA);
1390 		req = mtod(m, struct cpl_smt_write_req *);
1391 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1392 		memset(req, 0, sizeof(*req));
1393 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1394 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1395 		req->iff = i;
1396 		t3_mgmt_tx(adap, m);
1397 	}
1398 
1399 	for (i = 0; i < 2048; i++) {
1400 		struct cpl_l2t_write_req *req;
1401 
1402 		m = m_gethdr(M_WAITOK, MT_DATA);
1403 		req = mtod(m, struct cpl_l2t_write_req *);
1404 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1405 		memset(req, 0, sizeof(*req));
1406 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1407 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1408 		req->params = htonl(V_L2T_W_IDX(i));
1409 		t3_mgmt_tx(adap, m);
1410 	}
1411 
1412 	for (i = 0; i < 2048; i++) {
1413 		struct cpl_rte_write_req *req;
1414 
1415 		m = m_gethdr(M_WAITOK, MT_DATA);
1416 		req = mtod(m, struct cpl_rte_write_req *);
1417 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1418 		memset(req, 0, sizeof(*req));
1419 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1420 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1421 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1422 		t3_mgmt_tx(adap, m);
1423 	}
1424 
1425 	m = m_gethdr(M_WAITOK, MT_DATA);
1426 	greq = mtod(m, struct cpl_set_tcb_field *);
1427 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1428 	memset(greq, 0, sizeof(*greq));
1429 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1430 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1431 	greq->mask = htobe64(1);
1432 	t3_mgmt_tx(adap, m);
1433 
1434 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1435 	t3_tp_set_offload_mode(adap, 0);
1436 	return (i);
1437 }
1438 
1439 /**
1440  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1441  *	@adap: the adapter
1442  *
1443  *	Sets up RSS to distribute packets to multiple receive queues.  We
1444  *	configure the RSS CPU lookup table to distribute to the number of HW
1445  *	receive queues, and the response queue lookup table to narrow that
1446  *	down to the response queues actually configured for each port.
1447  *	We always configure the RSS mapping for two ports since the mapping
1448  *	table has plenty of entries.
1449  */
1450 static void
1451 setup_rss(adapter_t *adap)
1452 {
1453 	int i;
1454 	u_int nq[2];
1455 	uint8_t cpus[SGE_QSETS + 1];
1456 	uint16_t rspq_map[RSS_TABLE_SIZE];
1457 
1458 	for (i = 0; i < SGE_QSETS; ++i)
1459 		cpus[i] = i;
1460 	cpus[SGE_QSETS] = 0xff;
1461 
1462 	nq[0] = nq[1] = 0;
1463 	for_each_port(adap, i) {
1464 		const struct port_info *pi = adap2pinfo(adap, i);
1465 
1466 		nq[pi->tx_chan] += pi->nqsets;
1467 	}
1468 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1469 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1470 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1471 	}
1472 
1473 	/* Calculate the reverse RSS map table */
1474 	for (i = 0; i < SGE_QSETS; ++i)
1475 		adap->rrss_map[i] = 0xff;
1476 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1477 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1478 			adap->rrss_map[rspq_map[i]] = i;
1479 
1480 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1481 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1482 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1483 	              cpus, rspq_map);
1484 
1485 }
1486 
1487 /*
1488  * Sends an mbuf to an offload queue driver
1489  * after dealing with any active network taps.
1490  */
1491 static inline int
1492 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1493 {
1494 	int ret;
1495 
1496 	ret = t3_offload_tx(tdev, m);
1497 	return (ret);
1498 }
1499 
1500 static int
1501 write_smt_entry(struct adapter *adapter, int idx)
1502 {
1503 	struct port_info *pi = &adapter->port[idx];
1504 	struct cpl_smt_write_req *req;
1505 	struct mbuf *m;
1506 
1507 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1508 		return (ENOMEM);
1509 
1510 	req = mtod(m, struct cpl_smt_write_req *);
1511 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1512 
1513 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1514 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1515 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1516 	req->iff = idx;
1517 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1518 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1519 
1520 	m_set_priority(m, 1);
1521 
1522 	offload_tx(&adapter->tdev, m);
1523 
1524 	return (0);
1525 }
1526 
1527 static int
1528 init_smt(struct adapter *adapter)
1529 {
1530 	int i;
1531 
1532 	for_each_port(adapter, i)
1533 		write_smt_entry(adapter, i);
1534 	return 0;
1535 }
1536 
1537 static void
1538 init_port_mtus(adapter_t *adapter)
1539 {
1540 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1541 
1542 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1543 }
1544 
1545 static void
1546 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1547 			      int hi, int port)
1548 {
1549 	struct mbuf *m;
1550 	struct mngt_pktsched_wr *req;
1551 
1552 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1553 	if (m) {
1554 		req = mtod(m, struct mngt_pktsched_wr *);
1555 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1556 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1557 		req->sched = sched;
1558 		req->idx = qidx;
1559 		req->min = lo;
1560 		req->max = hi;
1561 		req->binding = port;
1562 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1563 		t3_mgmt_tx(adap, m);
1564 	}
1565 }
1566 
1567 static void
1568 bind_qsets(adapter_t *sc)
1569 {
1570 	int i, j;
1571 
1572 	for (i = 0; i < (sc)->params.nports; ++i) {
1573 		const struct port_info *pi = adap2pinfo(sc, i);
1574 
1575 		for (j = 0; j < pi->nqsets; ++j) {
1576 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1577 					  -1, pi->tx_chan);
1578 
1579 		}
1580 	}
1581 }
1582 
1583 static void
1584 update_tpeeprom(struct adapter *adap)
1585 {
1586 #ifdef FIRMWARE_LATEST
1587 	const struct firmware *tpeeprom;
1588 #else
1589 	struct firmware *tpeeprom;
1590 #endif
1591 
1592 	uint32_t version;
1593 	unsigned int major, minor;
1594 	int ret, len;
1595 	char rev, name[32];
1596 
1597 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1598 
1599 	major = G_TP_VERSION_MAJOR(version);
1600 	minor = G_TP_VERSION_MINOR(version);
1601 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1602 		return;
1603 
1604 	rev = t3rev2char(adap);
1605 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1606 
1607 	tpeeprom = firmware_get(name);
1608 	if (tpeeprom == NULL) {
1609 		device_printf(adap->dev,
1610 			      "could not load TP EEPROM: unable to load %s\n",
1611 			      name);
1612 		return;
1613 	}
1614 
1615 	len = tpeeprom->datasize - 4;
1616 
1617 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1618 	if (ret)
1619 		goto release_tpeeprom;
1620 
1621 	if (len != TP_SRAM_LEN) {
1622 		device_printf(adap->dev,
1623 			      "%s length is wrong len=%d expected=%d\n", name,
1624 			      len, TP_SRAM_LEN);
1625 		return;
1626 	}
1627 
1628 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1629 	    TP_SRAM_OFFSET);
1630 
1631 	if (!ret) {
1632 		device_printf(adap->dev,
1633 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1634 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1635 	} else
1636 		device_printf(adap->dev,
1637 			      "Protocol SRAM image update in EEPROM failed\n");
1638 
1639 release_tpeeprom:
1640 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1641 
1642 	return;
1643 }
1644 
1645 static int
1646 update_tpsram(struct adapter *adap)
1647 {
1648 #ifdef FIRMWARE_LATEST
1649 	const struct firmware *tpsram;
1650 #else
1651 	struct firmware *tpsram;
1652 #endif
1653 	int ret;
1654 	char rev, name[32];
1655 
1656 	rev = t3rev2char(adap);
1657 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1658 
1659 	update_tpeeprom(adap);
1660 
1661 	tpsram = firmware_get(name);
1662 	if (tpsram == NULL){
1663 		device_printf(adap->dev, "could not load TP SRAM\n");
1664 		return (EINVAL);
1665 	} else
1666 		device_printf(adap->dev, "updating TP SRAM\n");
1667 
1668 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1669 	if (ret)
1670 		goto release_tpsram;
1671 
1672 	ret = t3_set_proto_sram(adap, tpsram->data);
1673 	if (ret)
1674 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1675 
1676 release_tpsram:
1677 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1678 
1679 	return ret;
1680 }
1681 
1682 /**
1683  *	cxgb_up - enable the adapter
1684  *	@adap: adapter being enabled
1685  *
1686  *	Called when the first port is enabled, this function performs the
1687  *	actions necessary to make an adapter operational, such as completing
1688  *	the initialization of HW modules, and enabling interrupts.
1689  */
1690 static int
1691 cxgb_up(struct adapter *sc)
1692 {
1693 	int err = 0;
1694 
1695 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1696 					   __func__, sc->open_device_map));
1697 
1698 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1699 
1700 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1701 
1702 		if ((sc->flags & FW_UPTODATE) == 0)
1703 			if ((err = upgrade_fw(sc)))
1704 				goto out;
1705 
1706 		if ((sc->flags & TPS_UPTODATE) == 0)
1707 			if ((err = update_tpsram(sc)))
1708 				goto out;
1709 
1710 		err = t3_init_hw(sc, 0);
1711 		if (err)
1712 			goto out;
1713 
1714 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1715 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1716 
1717 		err = setup_sge_qsets(sc);
1718 		if (err)
1719 			goto out;
1720 
1721 		setup_rss(sc);
1722 
1723 		t3_intr_clear(sc);
1724 		err = cxgb_setup_interrupts(sc);
1725 		if (err)
1726 			goto out;
1727 
1728 		t3_add_configured_sysctls(sc);
1729 		sc->flags |= FULL_INIT_DONE;
1730 	}
1731 
1732 	t3_intr_clear(sc);
1733 	t3_sge_start(sc);
1734 	t3_intr_enable(sc);
1735 
1736 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1737 	    is_offload(sc) && init_tp_parity(sc) == 0)
1738 		sc->flags |= TP_PARITY_INIT;
1739 
1740 	if (sc->flags & TP_PARITY_INIT) {
1741 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1742 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1743 	}
1744 
1745 	if (!(sc->flags & QUEUES_BOUND)) {
1746 		bind_qsets(sc);
1747 		sc->flags |= QUEUES_BOUND;
1748 	}
1749 
1750 	t3_sge_reset_adapter(sc);
1751 out:
1752 	return (err);
1753 }
1754 
1755 /*
1756  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1757  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1758  * during controller_detach, not here.
1759  */
1760 static void
1761 cxgb_down(struct adapter *sc)
1762 {
1763 	t3_sge_stop(sc);
1764 	t3_intr_disable(sc);
1765 }
1766 
1767 static int
1768 offload_open(struct port_info *pi)
1769 {
1770 	struct adapter *sc = pi->adapter;
1771 	struct t3cdev *tdev = &sc->tdev;
1772 
1773 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1774 
1775 	t3_tp_set_offload_mode(sc, 1);
1776 	tdev->lldev = pi->ifp;
1777 	init_port_mtus(sc);
1778 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1779 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1780 	init_smt(sc);
1781 	cxgb_add_clients(tdev);
1782 
1783 	return (0);
1784 }
1785 
1786 static int
1787 offload_close(struct t3cdev *tdev)
1788 {
1789 	struct adapter *adapter = tdev2adap(tdev);
1790 
1791 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1792 		return (0);
1793 
1794 	/* Call back all registered clients */
1795 	cxgb_remove_clients(tdev);
1796 
1797 	tdev->lldev = NULL;
1798 	cxgb_set_dummy_ops(tdev);
1799 	t3_tp_set_offload_mode(adapter, 0);
1800 
1801 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1802 
1803 	return (0);
1804 }
1805 
1806 /*
1807  * if_init for cxgb ports.
1808  */
1809 static void
1810 cxgb_init(void *arg)
1811 {
1812 	struct port_info *p = arg;
1813 	struct adapter *sc = p->adapter;
1814 
1815 	ADAPTER_LOCK(sc);
1816 	cxgb_init_locked(p); /* releases adapter lock */
1817 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1818 }
1819 
1820 static int
1821 cxgb_init_locked(struct port_info *p)
1822 {
1823 	struct adapter *sc = p->adapter;
1824 	struct ifnet *ifp = p->ifp;
1825 	struct cmac *mac = &p->mac;
1826 	int i, rc = 0, may_sleep = 0;
1827 
1828 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1829 
1830 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1831 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1832 			rc = EINTR;
1833 			goto done;
1834 		}
1835 	}
1836 	if (IS_DOOMED(p)) {
1837 		rc = ENXIO;
1838 		goto done;
1839 	}
1840 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1841 
1842 	/*
1843 	 * The code that runs during one-time adapter initialization can sleep
1844 	 * so it's important not to hold any locks across it.
1845 	 */
1846 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1847 
1848 	if (may_sleep) {
1849 		SET_BUSY(sc);
1850 		ADAPTER_UNLOCK(sc);
1851 	}
1852 
1853 	if (sc->open_device_map == 0) {
1854 		if ((rc = cxgb_up(sc)) != 0)
1855 			goto done;
1856 
1857 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1858 			log(LOG_WARNING,
1859 			    "Could not initialize offload capabilities\n");
1860 	}
1861 
1862 	PORT_LOCK(p);
1863 	if (isset(&sc->open_device_map, p->port_id) &&
1864 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1865 		PORT_UNLOCK(p);
1866 		goto done;
1867 	}
1868 	t3_port_intr_enable(sc, p->port_id);
1869 	if (!mac->multiport)
1870 		t3_mac_init(mac);
1871 	cxgb_update_mac_settings(p);
1872 	t3_link_start(&p->phy, mac, &p->link_config);
1873 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1874 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1875 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1876 	PORT_UNLOCK(p);
1877 
1878 	t3_link_changed(sc, p->port_id);
1879 
1880 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1881 		struct sge_qset *qs = &sc->sge.qs[i];
1882 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1883 
1884 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1885 				 txq->txq_watchdog.c_cpu);
1886 	}
1887 
1888 	/* all ok */
1889 	setbit(&sc->open_device_map, p->port_id);
1890 
1891 done:
1892 	if (may_sleep) {
1893 		ADAPTER_LOCK(sc);
1894 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1895 		CLR_BUSY(sc);
1896 		wakeup_one(&sc->flags);
1897 	}
1898 	ADAPTER_UNLOCK(sc);
1899 	return (rc);
1900 }
1901 
1902 static int
1903 cxgb_uninit_locked(struct port_info *p)
1904 {
1905 	struct adapter *sc = p->adapter;
1906 	int rc;
1907 
1908 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1909 
1910 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1911 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1912 			rc = EINTR;
1913 			goto done;
1914 		}
1915 	}
1916 	if (IS_DOOMED(p)) {
1917 		rc = ENXIO;
1918 		goto done;
1919 	}
1920 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1921 	SET_BUSY(sc);
1922 	ADAPTER_UNLOCK(sc);
1923 
1924 	rc = cxgb_uninit_synchronized(p);
1925 
1926 	ADAPTER_LOCK(sc);
1927 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1928 	CLR_BUSY(sc);
1929 	wakeup_one(&sc->flags);
1930 done:
1931 	ADAPTER_UNLOCK(sc);
1932 	return (rc);
1933 }
1934 
1935 /*
1936  * Called on "ifconfig down", and from port_detach
1937  */
1938 static int
1939 cxgb_uninit_synchronized(struct port_info *pi)
1940 {
1941 	struct adapter *sc = pi->adapter;
1942 	struct ifnet *ifp = pi->ifp;
1943 
1944 	/*
1945 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1946 	 */
1947 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1948 
1949 	/*
1950 	 * Clear this port's bit from the open device map, and then drain all
1951 	 * the tasks that can access/manipulate this port's port_info or ifp.
1952 	 * We disable this port's interrupts here and so the the slow/ext
1953 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1954 	 * be enqueued every second but the runs after this drain will not see
1955 	 * this port in the open device map.
1956 	 *
1957 	 * A well behaved task must take open_device_map into account and ignore
1958 	 * ports that are not open.
1959 	 */
1960 	clrbit(&sc->open_device_map, pi->port_id);
1961 	t3_port_intr_disable(sc, pi->port_id);
1962 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1963 	taskqueue_drain(sc->tq, &sc->ext_intr_task);
1964 	taskqueue_drain(sc->tq, &sc->tick_task);
1965 
1966 	PORT_LOCK(pi);
1967 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1968 
1969 	/* disable pause frames */
1970 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1971 
1972 	/* Reset RX FIFO HWM */
1973 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1974 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1975 
1976 	DELAY(100 * 1000);
1977 
1978 	/* Wait for TXFIFO empty */
1979 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1980 			F_TXFIFO_EMPTY, 1, 20, 5);
1981 
1982 	DELAY(100 * 1000);
1983 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1984 
1985 
1986 	pi->phy.ops->power_down(&pi->phy, 1);
1987 
1988 	PORT_UNLOCK(pi);
1989 
1990 	pi->link_config.link_ok = 0;
1991 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1992 
1993 	if ((sc->open_device_map & PORT_MASK) == 0)
1994 		offload_close(&sc->tdev);
1995 
1996 	if (sc->open_device_map == 0)
1997 		cxgb_down(pi->adapter);
1998 
1999 	return (0);
2000 }
2001 
2002 #ifdef LRO_SUPPORTED
2003 /*
2004  * Mark lro enabled or disabled in all qsets for this port
2005  */
2006 static int
2007 cxgb_set_lro(struct port_info *p, int enabled)
2008 {
2009 	int i;
2010 	struct adapter *adp = p->adapter;
2011 	struct sge_qset *q;
2012 
2013 	PORT_LOCK_ASSERT_OWNED(p);
2014 	for (i = 0; i < p->nqsets; i++) {
2015 		q = &adp->sge.qs[p->first_qset + i];
2016 		q->lro.enabled = (enabled != 0);
2017 	}
2018 	return (0);
2019 }
2020 #endif
2021 
2022 static int
2023 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
2024 {
2025 	struct port_info *p = ifp->if_softc;
2026 	struct adapter *sc = p->adapter;
2027 	struct ifreq *ifr = (struct ifreq *)data;
2028 	int flags, error = 0, mtu;
2029 	uint32_t mask;
2030 
2031 	switch (command) {
2032 	case SIOCSIFMTU:
2033 		ADAPTER_LOCK(sc);
2034 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2035 		if (error) {
2036 fail:
2037 			ADAPTER_UNLOCK(sc);
2038 			return (error);
2039 		}
2040 
2041 		mtu = ifr->ifr_mtu;
2042 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2043 			error = EINVAL;
2044 		} else {
2045 			ifp->if_mtu = mtu;
2046 			PORT_LOCK(p);
2047 			cxgb_update_mac_settings(p);
2048 			PORT_UNLOCK(p);
2049 		}
2050 		ADAPTER_UNLOCK(sc);
2051 		break;
2052 	case SIOCSIFFLAGS:
2053 		ADAPTER_LOCK(sc);
2054 		if (IS_DOOMED(p)) {
2055 			error = ENXIO;
2056 			goto fail;
2057 		}
2058 		if (ifp->if_flags & IFF_UP) {
2059 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2060 				flags = p->if_flags;
2061 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2062 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2063 					if (IS_BUSY(sc)) {
2064 						error = EBUSY;
2065 						goto fail;
2066 					}
2067 					PORT_LOCK(p);
2068 					cxgb_update_mac_settings(p);
2069 					PORT_UNLOCK(p);
2070 				}
2071 				ADAPTER_UNLOCK(sc);
2072 			} else
2073 				error = cxgb_init_locked(p);
2074 			p->if_flags = ifp->if_flags;
2075 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2076 			error = cxgb_uninit_locked(p);
2077 		else
2078 			ADAPTER_UNLOCK(sc);
2079 
2080 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2081 		break;
2082 	case SIOCADDMULTI:
2083 	case SIOCDELMULTI:
2084 		ADAPTER_LOCK(sc);
2085 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2086 		if (error)
2087 			goto fail;
2088 
2089 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2090 			PORT_LOCK(p);
2091 			cxgb_update_mac_settings(p);
2092 			PORT_UNLOCK(p);
2093 		}
2094 		ADAPTER_UNLOCK(sc);
2095 
2096 		break;
2097 	case SIOCSIFCAP:
2098 		ADAPTER_LOCK(sc);
2099 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2100 		if (error)
2101 			goto fail;
2102 
2103 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2104 		if (mask & IFCAP_TXCSUM) {
2105 			if (IFCAP_TXCSUM & ifp->if_capenable) {
2106 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2107 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2108 				    | CSUM_IP | CSUM_TSO);
2109 			} else {
2110 				ifp->if_capenable |= IFCAP_TXCSUM;
2111 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2112 				    | CSUM_IP);
2113 			}
2114 		}
2115 		if (mask & IFCAP_RXCSUM) {
2116 			ifp->if_capenable ^= IFCAP_RXCSUM;
2117 		}
2118 		if (mask & IFCAP_TSO4) {
2119 			if (IFCAP_TSO4 & ifp->if_capenable) {
2120 				ifp->if_capenable &= ~IFCAP_TSO4;
2121 				ifp->if_hwassist &= ~CSUM_TSO;
2122 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2123 				ifp->if_capenable |= IFCAP_TSO4;
2124 				ifp->if_hwassist |= CSUM_TSO;
2125 			} else
2126 				error = EINVAL;
2127 		}
2128 #ifdef LRO_SUPPORTED
2129 		if (mask & IFCAP_LRO) {
2130 			ifp->if_capenable ^= IFCAP_LRO;
2131 
2132 			/* Safe to do this even if cxgb_up not called yet */
2133 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2134 		}
2135 #endif
2136 		if (mask & IFCAP_VLAN_HWTAGGING) {
2137 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2138 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2139 				PORT_LOCK(p);
2140 				cxgb_update_mac_settings(p);
2141 				PORT_UNLOCK(p);
2142 			}
2143 		}
2144 		if (mask & IFCAP_VLAN_MTU) {
2145 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2146 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2147 				PORT_LOCK(p);
2148 				cxgb_update_mac_settings(p);
2149 				PORT_UNLOCK(p);
2150 			}
2151 		}
2152 		if (mask & IFCAP_VLAN_HWCSUM)
2153 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2154 
2155 #ifdef VLAN_CAPABILITIES
2156 		VLAN_CAPABILITIES(ifp);
2157 #endif
2158 		ADAPTER_UNLOCK(sc);
2159 		break;
2160 	case SIOCSIFMEDIA:
2161 	case SIOCGIFMEDIA:
2162 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2163 		break;
2164 	default:
2165 		error = ether_ioctl(ifp, command, data);
2166 	}
2167 
2168 	return (error);
2169 }
2170 
2171 static int
2172 cxgb_media_change(struct ifnet *ifp)
2173 {
2174 	return (EOPNOTSUPP);
2175 }
2176 
2177 /*
2178  * Translates phy->modtype to the correct Ethernet media subtype.
2179  */
2180 static int
2181 cxgb_ifm_type(int mod)
2182 {
2183 	switch (mod) {
2184 	case phy_modtype_sr:
2185 		return (IFM_10G_SR);
2186 	case phy_modtype_lr:
2187 		return (IFM_10G_LR);
2188 	case phy_modtype_lrm:
2189 		return (IFM_10G_LRM);
2190 	case phy_modtype_twinax:
2191 		return (IFM_10G_TWINAX);
2192 	case phy_modtype_twinax_long:
2193 		return (IFM_10G_TWINAX_LONG);
2194 	case phy_modtype_none:
2195 		return (IFM_NONE);
2196 	case phy_modtype_unknown:
2197 		return (IFM_UNKNOWN);
2198 	}
2199 
2200 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2201 	return (IFM_UNKNOWN);
2202 }
2203 
2204 /*
2205  * Rebuilds the ifmedia list for this port, and sets the current media.
2206  */
2207 static void
2208 cxgb_build_medialist(struct port_info *p)
2209 {
2210 	struct cphy *phy = &p->phy;
2211 	struct ifmedia *media = &p->media;
2212 	int mod = phy->modtype;
2213 	int m = IFM_ETHER | IFM_FDX;
2214 
2215 	PORT_LOCK(p);
2216 
2217 	ifmedia_removeall(media);
2218 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2219 		/* Copper (RJ45) */
2220 
2221 		if (phy->caps & SUPPORTED_10000baseT_Full)
2222 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2223 
2224 		if (phy->caps & SUPPORTED_1000baseT_Full)
2225 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2226 
2227 		if (phy->caps & SUPPORTED_100baseT_Full)
2228 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2229 
2230 		if (phy->caps & SUPPORTED_10baseT_Full)
2231 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2232 
2233 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2234 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2235 
2236 	} else if (phy->caps & SUPPORTED_TP) {
2237 		/* Copper (CX4) */
2238 
2239 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2240 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2241 
2242 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2243 		ifmedia_set(media, m | IFM_10G_CX4);
2244 
2245 	} else if (phy->caps & SUPPORTED_FIBRE &&
2246 		   phy->caps & SUPPORTED_10000baseT_Full) {
2247 		/* 10G optical (but includes SFP+ twinax) */
2248 
2249 		m |= cxgb_ifm_type(mod);
2250 		if (IFM_SUBTYPE(m) == IFM_NONE)
2251 			m &= ~IFM_FDX;
2252 
2253 		ifmedia_add(media, m, mod, NULL);
2254 		ifmedia_set(media, m);
2255 
2256 	} else if (phy->caps & SUPPORTED_FIBRE &&
2257 		   phy->caps & SUPPORTED_1000baseT_Full) {
2258 		/* 1G optical */
2259 
2260 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2261 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2262 		ifmedia_set(media, m | IFM_1000_SX);
2263 
2264 	} else {
2265 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2266 			    phy->caps));
2267 	}
2268 
2269 	PORT_UNLOCK(p);
2270 }
2271 
2272 static void
2273 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2274 {
2275 	struct port_info *p = ifp->if_softc;
2276 	struct ifmedia_entry *cur = p->media.ifm_cur;
2277 	int speed = p->link_config.speed;
2278 
2279 	if (cur->ifm_data != p->phy.modtype) {
2280 		cxgb_build_medialist(p);
2281 		cur = p->media.ifm_cur;
2282 	}
2283 
2284 	ifmr->ifm_status = IFM_AVALID;
2285 	if (!p->link_config.link_ok)
2286 		return;
2287 
2288 	ifmr->ifm_status |= IFM_ACTIVE;
2289 
2290 	/*
2291 	 * active and current will differ iff current media is autoselect.  That
2292 	 * can happen only for copper RJ45.
2293 	 */
2294 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2295 		return;
2296 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2297 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2298 
2299 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2300 	if (speed == SPEED_10000)
2301 		ifmr->ifm_active |= IFM_10G_T;
2302 	else if (speed == SPEED_1000)
2303 		ifmr->ifm_active |= IFM_1000_T;
2304 	else if (speed == SPEED_100)
2305 		ifmr->ifm_active |= IFM_100_TX;
2306 	else if (speed == SPEED_10)
2307 		ifmr->ifm_active |= IFM_10_T;
2308 	else
2309 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2310 			    speed));
2311 }
2312 
2313 static void
2314 cxgb_async_intr(void *data)
2315 {
2316 	adapter_t *sc = data;
2317 
2318 	if (cxgb_debug)
2319 		device_printf(sc->dev, "cxgb_async_intr\n");
2320 	/*
2321 	 * May need to sleep - defer to taskqueue
2322 	 */
2323 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2324 }
2325 
2326 static void
2327 cxgb_ext_intr_handler(void *arg, int count)
2328 {
2329 	adapter_t *sc = (adapter_t *)arg;
2330 
2331 	if (cxgb_debug)
2332 		printf("cxgb_ext_intr_handler\n");
2333 
2334 	t3_phy_intr_handler(sc);
2335 
2336 	/* Now reenable external interrupts */
2337 	ADAPTER_LOCK(sc);
2338 	if (sc->slow_intr_mask) {
2339 		sc->slow_intr_mask |= F_T3DBG;
2340 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2341 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2342 	}
2343 	ADAPTER_UNLOCK(sc);
2344 }
2345 
2346 static inline int
2347 link_poll_needed(struct port_info *p)
2348 {
2349 	struct cphy *phy = &p->phy;
2350 
2351 	if (phy->caps & POLL_LINK_1ST_TIME) {
2352 		p->phy.caps &= ~POLL_LINK_1ST_TIME;
2353 		return (1);
2354 	}
2355 
2356 	return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2357 }
2358 
2359 static void
2360 check_link_status(adapter_t *sc)
2361 {
2362 	int i;
2363 
2364 	for (i = 0; i < (sc)->params.nports; ++i) {
2365 		struct port_info *p = &sc->port[i];
2366 
2367 		if (!isset(&sc->open_device_map, p->port_id))
2368 			continue;
2369 
2370 		if (link_poll_needed(p))
2371 			t3_link_changed(sc, i);
2372 	}
2373 }
2374 
2375 static void
2376 check_t3b2_mac(struct adapter *sc)
2377 {
2378 	int i;
2379 
2380 	if (sc->flags & CXGB_SHUTDOWN)
2381 		return;
2382 
2383 	for_each_port(sc, i) {
2384 		struct port_info *p = &sc->port[i];
2385 		int status;
2386 #ifdef INVARIANTS
2387 		struct ifnet *ifp = p->ifp;
2388 #endif
2389 
2390 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2391 		    !p->link_config.link_ok)
2392 			continue;
2393 
2394 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2395 			("%s: state mismatch (drv_flags %x, device_map %x)",
2396 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2397 
2398 		PORT_LOCK(p);
2399 		status = t3b2_mac_watchdog_task(&p->mac);
2400 		if (status == 1)
2401 			p->mac.stats.num_toggled++;
2402 		else if (status == 2) {
2403 			struct cmac *mac = &p->mac;
2404 
2405 			cxgb_update_mac_settings(p);
2406 			t3_link_start(&p->phy, mac, &p->link_config);
2407 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2408 			t3_port_intr_enable(sc, p->port_id);
2409 			p->mac.stats.num_resets++;
2410 		}
2411 		PORT_UNLOCK(p);
2412 	}
2413 }
2414 
2415 static void
2416 cxgb_tick(void *arg)
2417 {
2418 	adapter_t *sc = (adapter_t *)arg;
2419 
2420 	if (sc->flags & CXGB_SHUTDOWN)
2421 		return;
2422 
2423 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2424 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2425 }
2426 
2427 static void
2428 cxgb_tick_handler(void *arg, int count)
2429 {
2430 	adapter_t *sc = (adapter_t *)arg;
2431 	const struct adapter_params *p = &sc->params;
2432 	int i;
2433 	uint32_t cause, reset;
2434 
2435 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2436 		return;
2437 
2438 	check_link_status(sc);
2439 
2440 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2441 		check_t3b2_mac(sc);
2442 
2443 	cause = t3_read_reg(sc, A_SG_INT_CAUSE);
2444 	reset = 0;
2445 	if (cause & F_FLEMPTY) {
2446 		struct sge_qset *qs = &sc->sge.qs[0];
2447 
2448 		i = 0;
2449 		reset |= F_FLEMPTY;
2450 
2451 		cause = (t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) >>
2452 			 S_FL0EMPTY) & 0xffff;
2453 		while (cause) {
2454 			qs->fl[i].empty += (cause & 1);
2455 			if (i)
2456 				qs++;
2457 			i ^= 1;
2458 			cause >>= 1;
2459 		}
2460 	}
2461 	t3_write_reg(sc, A_SG_INT_CAUSE, reset);
2462 
2463 	for (i = 0; i < sc->params.nports; i++) {
2464 		struct port_info *pi = &sc->port[i];
2465 		struct ifnet *ifp = pi->ifp;
2466 		struct cmac *mac = &pi->mac;
2467 		struct mac_stats *mstats = &mac->stats;
2468 
2469 		if (!isset(&sc->open_device_map, pi->port_id))
2470 			continue;
2471 
2472 		PORT_LOCK(pi);
2473 		t3_mac_update_stats(mac);
2474 		PORT_UNLOCK(pi);
2475 
2476 		ifp->if_opackets =
2477 		    mstats->tx_frames_64 +
2478 		    mstats->tx_frames_65_127 +
2479 		    mstats->tx_frames_128_255 +
2480 		    mstats->tx_frames_256_511 +
2481 		    mstats->tx_frames_512_1023 +
2482 		    mstats->tx_frames_1024_1518 +
2483 		    mstats->tx_frames_1519_max;
2484 
2485 		ifp->if_ipackets =
2486 		    mstats->rx_frames_64 +
2487 		    mstats->rx_frames_65_127 +
2488 		    mstats->rx_frames_128_255 +
2489 		    mstats->rx_frames_256_511 +
2490 		    mstats->rx_frames_512_1023 +
2491 		    mstats->rx_frames_1024_1518 +
2492 		    mstats->rx_frames_1519_max;
2493 
2494 		ifp->if_obytes = mstats->tx_octets;
2495 		ifp->if_ibytes = mstats->rx_octets;
2496 		ifp->if_omcasts = mstats->tx_mcast_frames;
2497 		ifp->if_imcasts = mstats->rx_mcast_frames;
2498 
2499 		ifp->if_collisions =
2500 		    mstats->tx_total_collisions;
2501 
2502 		ifp->if_iqdrops = mstats->rx_cong_drops;
2503 
2504 		ifp->if_oerrors =
2505 		    mstats->tx_excess_collisions +
2506 		    mstats->tx_underrun +
2507 		    mstats->tx_len_errs +
2508 		    mstats->tx_mac_internal_errs +
2509 		    mstats->tx_excess_deferral +
2510 		    mstats->tx_fcs_errs;
2511 		ifp->if_ierrors =
2512 		    mstats->rx_jabber +
2513 		    mstats->rx_data_errs +
2514 		    mstats->rx_sequence_errs +
2515 		    mstats->rx_runt +
2516 		    mstats->rx_too_long +
2517 		    mstats->rx_mac_internal_errs +
2518 		    mstats->rx_short +
2519 		    mstats->rx_fcs_errs;
2520 
2521 		if (mac->multiport)
2522 			continue;
2523 
2524 		/* Count rx fifo overflows, once per second */
2525 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2526 		reset = 0;
2527 		if (cause & F_RXFIFO_OVERFLOW) {
2528 			mac->stats.rx_fifo_ovfl++;
2529 			reset |= F_RXFIFO_OVERFLOW;
2530 		}
2531 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2532 	}
2533 }
2534 
2535 static void
2536 touch_bars(device_t dev)
2537 {
2538 	/*
2539 	 * Don't enable yet
2540 	 */
2541 #if !defined(__LP64__) && 0
2542 	u32 v;
2543 
2544 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2545 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2546 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2547 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2548 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2549 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2550 #endif
2551 }
2552 
2553 static int
2554 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2555 {
2556 	uint8_t *buf;
2557 	int err = 0;
2558 	u32 aligned_offset, aligned_len, *p;
2559 	struct adapter *adapter = pi->adapter;
2560 
2561 
2562 	aligned_offset = offset & ~3;
2563 	aligned_len = (len + (offset & 3) + 3) & ~3;
2564 
2565 	if (aligned_offset != offset || aligned_len != len) {
2566 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2567 		if (!buf)
2568 			return (ENOMEM);
2569 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2570 		if (!err && aligned_len > 4)
2571 			err = t3_seeprom_read(adapter,
2572 					      aligned_offset + aligned_len - 4,
2573 					      (u32 *)&buf[aligned_len - 4]);
2574 		if (err)
2575 			goto out;
2576 		memcpy(buf + (offset & 3), data, len);
2577 	} else
2578 		buf = (uint8_t *)(uintptr_t)data;
2579 
2580 	err = t3_seeprom_wp(adapter, 0);
2581 	if (err)
2582 		goto out;
2583 
2584 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2585 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2586 		aligned_offset += 4;
2587 	}
2588 
2589 	if (!err)
2590 		err = t3_seeprom_wp(adapter, 1);
2591 out:
2592 	if (buf != data)
2593 		free(buf, M_DEVBUF);
2594 	return err;
2595 }
2596 
2597 
2598 static int
2599 in_range(int val, int lo, int hi)
2600 {
2601 	return val < 0 || (val <= hi && val >= lo);
2602 }
2603 
2604 static int
2605 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2606 {
2607        return (0);
2608 }
2609 
2610 static int
2611 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2612 {
2613        return (0);
2614 }
2615 
2616 static int
2617 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2618     int fflag, struct thread *td)
2619 {
2620 	int mmd, error = 0;
2621 	struct port_info *pi = dev->si_drv1;
2622 	adapter_t *sc = pi->adapter;
2623 
2624 #ifdef PRIV_SUPPORTED
2625 	if (priv_check(td, PRIV_DRIVER)) {
2626 		if (cxgb_debug)
2627 			printf("user does not have access to privileged ioctls\n");
2628 		return (EPERM);
2629 	}
2630 #else
2631 	if (suser(td)) {
2632 		if (cxgb_debug)
2633 			printf("user does not have access to privileged ioctls\n");
2634 		return (EPERM);
2635 	}
2636 #endif
2637 
2638 	switch (cmd) {
2639 	case CHELSIO_GET_MIIREG: {
2640 		uint32_t val;
2641 		struct cphy *phy = &pi->phy;
2642 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2643 
2644 		if (!phy->mdio_read)
2645 			return (EOPNOTSUPP);
2646 		if (is_10G(sc)) {
2647 			mmd = mid->phy_id >> 8;
2648 			if (!mmd)
2649 				mmd = MDIO_DEV_PCS;
2650 			else if (mmd > MDIO_DEV_VEND2)
2651 				return (EINVAL);
2652 
2653 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2654 					     mid->reg_num, &val);
2655 		} else
2656 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2657 					     mid->reg_num & 0x1f, &val);
2658 		if (error == 0)
2659 			mid->val_out = val;
2660 		break;
2661 	}
2662 	case CHELSIO_SET_MIIREG: {
2663 		struct cphy *phy = &pi->phy;
2664 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2665 
2666 		if (!phy->mdio_write)
2667 			return (EOPNOTSUPP);
2668 		if (is_10G(sc)) {
2669 			mmd = mid->phy_id >> 8;
2670 			if (!mmd)
2671 				mmd = MDIO_DEV_PCS;
2672 			else if (mmd > MDIO_DEV_VEND2)
2673 				return (EINVAL);
2674 
2675 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2676 					      mmd, mid->reg_num, mid->val_in);
2677 		} else
2678 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2679 					      mid->reg_num & 0x1f,
2680 					      mid->val_in);
2681 		break;
2682 	}
2683 	case CHELSIO_SETREG: {
2684 		struct ch_reg *edata = (struct ch_reg *)data;
2685 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2686 			return (EFAULT);
2687 		t3_write_reg(sc, edata->addr, edata->val);
2688 		break;
2689 	}
2690 	case CHELSIO_GETREG: {
2691 		struct ch_reg *edata = (struct ch_reg *)data;
2692 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2693 			return (EFAULT);
2694 		edata->val = t3_read_reg(sc, edata->addr);
2695 		break;
2696 	}
2697 	case CHELSIO_GET_SGE_CONTEXT: {
2698 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2699 		mtx_lock_spin(&sc->sge.reg_lock);
2700 		switch (ecntxt->cntxt_type) {
2701 		case CNTXT_TYPE_EGRESS:
2702 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2703 			    ecntxt->data);
2704 			break;
2705 		case CNTXT_TYPE_FL:
2706 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2707 			    ecntxt->data);
2708 			break;
2709 		case CNTXT_TYPE_RSP:
2710 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2711 			    ecntxt->data);
2712 			break;
2713 		case CNTXT_TYPE_CQ:
2714 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2715 			    ecntxt->data);
2716 			break;
2717 		default:
2718 			error = EINVAL;
2719 			break;
2720 		}
2721 		mtx_unlock_spin(&sc->sge.reg_lock);
2722 		break;
2723 	}
2724 	case CHELSIO_GET_SGE_DESC: {
2725 		struct ch_desc *edesc = (struct ch_desc *)data;
2726 		int ret;
2727 		if (edesc->queue_num >= SGE_QSETS * 6)
2728 			return (EINVAL);
2729 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2730 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2731 		if (ret < 0)
2732 			return (EINVAL);
2733 		edesc->size = ret;
2734 		break;
2735 	}
2736 	case CHELSIO_GET_QSET_PARAMS: {
2737 		struct qset_params *q;
2738 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2739 		int q1 = pi->first_qset;
2740 		int nqsets = pi->nqsets;
2741 		int i;
2742 
2743 		if (t->qset_idx >= nqsets)
2744 			return EINVAL;
2745 
2746 		i = q1 + t->qset_idx;
2747 		q = &sc->params.sge.qset[i];
2748 		t->rspq_size   = q->rspq_size;
2749 		t->txq_size[0] = q->txq_size[0];
2750 		t->txq_size[1] = q->txq_size[1];
2751 		t->txq_size[2] = q->txq_size[2];
2752 		t->fl_size[0]  = q->fl_size;
2753 		t->fl_size[1]  = q->jumbo_size;
2754 		t->polling     = q->polling;
2755 		t->lro         = q->lro;
2756 		t->intr_lat    = q->coalesce_usecs;
2757 		t->cong_thres  = q->cong_thres;
2758 		t->qnum        = i;
2759 
2760 		if (sc->flags & USING_MSIX)
2761 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2762 		else
2763 			t->vector = rman_get_start(sc->irq_res);
2764 
2765 		break;
2766 	}
2767 	case CHELSIO_GET_QSET_NUM: {
2768 		struct ch_reg *edata = (struct ch_reg *)data;
2769 		edata->val = pi->nqsets;
2770 		break;
2771 	}
2772 	case CHELSIO_LOAD_FW: {
2773 		uint8_t *fw_data;
2774 		uint32_t vers;
2775 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2776 
2777 		/*
2778 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2779 		 *
2780 		 * FW_UPTODATE is also set so the rest of the initialization
2781 		 * will not overwrite what was loaded here.  This gives you the
2782 		 * flexibility to load any firmware (and maybe shoot yourself in
2783 		 * the foot).
2784 		 */
2785 
2786 		ADAPTER_LOCK(sc);
2787 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2788 			ADAPTER_UNLOCK(sc);
2789 			return (EBUSY);
2790 		}
2791 
2792 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2793 		if (!fw_data)
2794 			error = ENOMEM;
2795 		else
2796 			error = copyin(t->buf, fw_data, t->len);
2797 
2798 		if (!error)
2799 			error = -t3_load_fw(sc, fw_data, t->len);
2800 
2801 		if (t3_get_fw_version(sc, &vers) == 0) {
2802 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2803 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2804 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2805 		}
2806 
2807 		if (!error)
2808 			sc->flags |= FW_UPTODATE;
2809 
2810 		free(fw_data, M_DEVBUF);
2811 		ADAPTER_UNLOCK(sc);
2812 		break;
2813 	}
2814 	case CHELSIO_LOAD_BOOT: {
2815 		uint8_t *boot_data;
2816 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2817 
2818 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2819 		if (!boot_data)
2820 			return ENOMEM;
2821 
2822 		error = copyin(t->buf, boot_data, t->len);
2823 		if (!error)
2824 			error = -t3_load_boot(sc, boot_data, t->len);
2825 
2826 		free(boot_data, M_DEVBUF);
2827 		break;
2828 	}
2829 	case CHELSIO_GET_PM: {
2830 		struct ch_pm *m = (struct ch_pm *)data;
2831 		struct tp_params *p = &sc->params.tp;
2832 
2833 		if (!is_offload(sc))
2834 			return (EOPNOTSUPP);
2835 
2836 		m->tx_pg_sz = p->tx_pg_size;
2837 		m->tx_num_pg = p->tx_num_pgs;
2838 		m->rx_pg_sz  = p->rx_pg_size;
2839 		m->rx_num_pg = p->rx_num_pgs;
2840 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2841 
2842 		break;
2843 	}
2844 	case CHELSIO_SET_PM: {
2845 		struct ch_pm *m = (struct ch_pm *)data;
2846 		struct tp_params *p = &sc->params.tp;
2847 
2848 		if (!is_offload(sc))
2849 			return (EOPNOTSUPP);
2850 		if (sc->flags & FULL_INIT_DONE)
2851 			return (EBUSY);
2852 
2853 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2854 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2855 			return (EINVAL);	/* not power of 2 */
2856 		if (!(m->rx_pg_sz & 0x14000))
2857 			return (EINVAL);	/* not 16KB or 64KB */
2858 		if (!(m->tx_pg_sz & 0x1554000))
2859 			return (EINVAL);
2860 		if (m->tx_num_pg == -1)
2861 			m->tx_num_pg = p->tx_num_pgs;
2862 		if (m->rx_num_pg == -1)
2863 			m->rx_num_pg = p->rx_num_pgs;
2864 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2865 			return (EINVAL);
2866 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2867 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2868 			return (EINVAL);
2869 
2870 		p->rx_pg_size = m->rx_pg_sz;
2871 		p->tx_pg_size = m->tx_pg_sz;
2872 		p->rx_num_pgs = m->rx_num_pg;
2873 		p->tx_num_pgs = m->tx_num_pg;
2874 		break;
2875 	}
2876 	case CHELSIO_SETMTUTAB: {
2877 		struct ch_mtus *m = (struct ch_mtus *)data;
2878 		int i;
2879 
2880 		if (!is_offload(sc))
2881 			return (EOPNOTSUPP);
2882 		if (offload_running(sc))
2883 			return (EBUSY);
2884 		if (m->nmtus != NMTUS)
2885 			return (EINVAL);
2886 		if (m->mtus[0] < 81)         /* accommodate SACK */
2887 			return (EINVAL);
2888 
2889 		/*
2890 		 * MTUs must be in ascending order
2891 		 */
2892 		for (i = 1; i < NMTUS; ++i)
2893 			if (m->mtus[i] < m->mtus[i - 1])
2894 				return (EINVAL);
2895 
2896 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2897 		break;
2898 	}
2899 	case CHELSIO_GETMTUTAB: {
2900 		struct ch_mtus *m = (struct ch_mtus *)data;
2901 
2902 		if (!is_offload(sc))
2903 			return (EOPNOTSUPP);
2904 
2905 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2906 		m->nmtus = NMTUS;
2907 		break;
2908 	}
2909 	case CHELSIO_GET_MEM: {
2910 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2911 		struct mc7 *mem;
2912 		uint8_t *useraddr;
2913 		u64 buf[32];
2914 
2915 		/*
2916 		 * Use these to avoid modifying len/addr in the the return
2917 		 * struct
2918 		 */
2919 		uint32_t len = t->len, addr = t->addr;
2920 
2921 		if (!is_offload(sc))
2922 			return (EOPNOTSUPP);
2923 		if (!(sc->flags & FULL_INIT_DONE))
2924 			return (EIO);         /* need the memory controllers */
2925 		if ((addr & 0x7) || (len & 0x7))
2926 			return (EINVAL);
2927 		if (t->mem_id == MEM_CM)
2928 			mem = &sc->cm;
2929 		else if (t->mem_id == MEM_PMRX)
2930 			mem = &sc->pmrx;
2931 		else if (t->mem_id == MEM_PMTX)
2932 			mem = &sc->pmtx;
2933 		else
2934 			return (EINVAL);
2935 
2936 		/*
2937 		 * Version scheme:
2938 		 * bits 0..9: chip version
2939 		 * bits 10..15: chip revision
2940 		 */
2941 		t->version = 3 | (sc->params.rev << 10);
2942 
2943 		/*
2944 		 * Read 256 bytes at a time as len can be large and we don't
2945 		 * want to use huge intermediate buffers.
2946 		 */
2947 		useraddr = (uint8_t *)t->buf;
2948 		while (len) {
2949 			unsigned int chunk = min(len, sizeof(buf));
2950 
2951 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2952 			if (error)
2953 				return (-error);
2954 			if (copyout(buf, useraddr, chunk))
2955 				return (EFAULT);
2956 			useraddr += chunk;
2957 			addr += chunk;
2958 			len -= chunk;
2959 		}
2960 		break;
2961 	}
2962 	case CHELSIO_READ_TCAM_WORD: {
2963 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2964 
2965 		if (!is_offload(sc))
2966 			return (EOPNOTSUPP);
2967 		if (!(sc->flags & FULL_INIT_DONE))
2968 			return (EIO);         /* need MC5 */
2969 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2970 		break;
2971 	}
2972 	case CHELSIO_SET_TRACE_FILTER: {
2973 		struct ch_trace *t = (struct ch_trace *)data;
2974 		const struct trace_params *tp;
2975 
2976 		tp = (const struct trace_params *)&t->sip;
2977 		if (t->config_tx)
2978 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2979 					       t->trace_tx);
2980 		if (t->config_rx)
2981 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2982 					       t->trace_rx);
2983 		break;
2984 	}
2985 	case CHELSIO_SET_PKTSCHED: {
2986 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2987 		if (sc->open_device_map == 0)
2988 			return (EAGAIN);
2989 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2990 		    p->binding);
2991 		break;
2992 	}
2993 	case CHELSIO_IFCONF_GETREGS: {
2994 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2995 		int reglen = cxgb_get_regs_len();
2996 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2997 		if (buf == NULL) {
2998 			return (ENOMEM);
2999 		}
3000 		if (regs->len > reglen)
3001 			regs->len = reglen;
3002 		else if (regs->len < reglen)
3003 			error = ENOBUFS;
3004 
3005 		if (!error) {
3006 			cxgb_get_regs(sc, regs, buf);
3007 			error = copyout(buf, regs->data, reglen);
3008 		}
3009 		free(buf, M_DEVBUF);
3010 
3011 		break;
3012 	}
3013 	case CHELSIO_SET_HW_SCHED: {
3014 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
3015 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
3016 
3017 		if ((sc->flags & FULL_INIT_DONE) == 0)
3018 			return (EAGAIN);       /* need TP to be initialized */
3019 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
3020 		    !in_range(t->channel, 0, 1) ||
3021 		    !in_range(t->kbps, 0, 10000000) ||
3022 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
3023 		    !in_range(t->flow_ipg, 0,
3024 			      dack_ticks_to_usec(sc, 0x7ff)))
3025 			return (EINVAL);
3026 
3027 		if (t->kbps >= 0) {
3028 			error = t3_config_sched(sc, t->kbps, t->sched);
3029 			if (error < 0)
3030 				return (-error);
3031 		}
3032 		if (t->class_ipg >= 0)
3033 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
3034 		if (t->flow_ipg >= 0) {
3035 			t->flow_ipg *= 1000;     /* us -> ns */
3036 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
3037 		}
3038 		if (t->mode >= 0) {
3039 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
3040 
3041 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3042 					 bit, t->mode ? bit : 0);
3043 		}
3044 		if (t->channel >= 0)
3045 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3046 					 1 << t->sched, t->channel << t->sched);
3047 		break;
3048 	}
3049 	case CHELSIO_GET_EEPROM: {
3050 		int i;
3051 		struct ch_eeprom *e = (struct ch_eeprom *)data;
3052 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3053 
3054 		if (buf == NULL) {
3055 			return (ENOMEM);
3056 		}
3057 		e->magic = EEPROM_MAGIC;
3058 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3059 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3060 
3061 		if (!error)
3062 			error = copyout(buf + e->offset, e->data, e->len);
3063 
3064 		free(buf, M_DEVBUF);
3065 		break;
3066 	}
3067 	case CHELSIO_CLEAR_STATS: {
3068 		if (!(sc->flags & FULL_INIT_DONE))
3069 			return EAGAIN;
3070 
3071 		PORT_LOCK(pi);
3072 		t3_mac_update_stats(&pi->mac);
3073 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3074 		PORT_UNLOCK(pi);
3075 		break;
3076 	}
3077 	case CHELSIO_GET_UP_LA: {
3078 		struct ch_up_la *la = (struct ch_up_la *)data;
3079 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3080 		if (buf == NULL) {
3081 			return (ENOMEM);
3082 		}
3083 		if (la->bufsize < LA_BUFSIZE)
3084 			error = ENOBUFS;
3085 
3086 		if (!error)
3087 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3088 					      &la->bufsize, buf);
3089 		if (!error)
3090 			error = copyout(buf, la->data, la->bufsize);
3091 
3092 		free(buf, M_DEVBUF);
3093 		break;
3094 	}
3095 	case CHELSIO_GET_UP_IOQS: {
3096 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3097 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3098 		uint32_t *v;
3099 
3100 		if (buf == NULL) {
3101 			return (ENOMEM);
3102 		}
3103 		if (ioqs->bufsize < IOQS_BUFSIZE)
3104 			error = ENOBUFS;
3105 
3106 		if (!error)
3107 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3108 
3109 		if (!error) {
3110 			v = (uint32_t *)buf;
3111 
3112 			ioqs->bufsize -= 4 * sizeof(uint32_t);
3113 			ioqs->ioq_rx_enable = *v++;
3114 			ioqs->ioq_tx_enable = *v++;
3115 			ioqs->ioq_rx_status = *v++;
3116 			ioqs->ioq_tx_status = *v++;
3117 
3118 			error = copyout(v, ioqs->data, ioqs->bufsize);
3119 		}
3120 
3121 		free(buf, M_DEVBUF);
3122 		break;
3123 	}
3124 	default:
3125 		return (EOPNOTSUPP);
3126 		break;
3127 	}
3128 
3129 	return (error);
3130 }
3131 
3132 static __inline void
3133 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3134     unsigned int end)
3135 {
3136 	uint32_t *p = (uint32_t *)(buf + start);
3137 
3138 	for ( ; start <= end; start += sizeof(uint32_t))
3139 		*p++ = t3_read_reg(ap, start);
3140 }
3141 
3142 #define T3_REGMAP_SIZE (3 * 1024)
3143 static int
3144 cxgb_get_regs_len(void)
3145 {
3146 	return T3_REGMAP_SIZE;
3147 }
3148 
3149 static void
3150 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3151 {
3152 
3153 	/*
3154 	 * Version scheme:
3155 	 * bits 0..9: chip version
3156 	 * bits 10..15: chip revision
3157 	 * bit 31: set for PCIe cards
3158 	 */
3159 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3160 
3161 	/*
3162 	 * We skip the MAC statistics registers because they are clear-on-read.
3163 	 * Also reading multi-register stats would need to synchronize with the
3164 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3165 	 */
3166 	memset(buf, 0, cxgb_get_regs_len());
3167 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3168 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3169 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3170 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3171 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3172 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3173 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3174 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3175 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3176 }
3177 
3178 
3179 MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
3180