xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision d8b878873e7aa8df1972cc6a642804b17eb61087)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_tick_handler(void *, int);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102 
103 /* Attachment glue for the PCI controller end of the device.  Each port of
104  * the device is attached separately, as defined later.
105  */
106 static int cxgb_controller_probe(device_t);
107 static int cxgb_controller_attach(device_t);
108 static int cxgb_controller_detach(device_t);
109 static void cxgb_free(struct adapter *);
110 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111     unsigned int end);
112 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
113 static int cxgb_get_regs_len(void);
114 static int offload_open(struct port_info *pi);
115 static void touch_bars(device_t dev);
116 static int offload_close(struct t3cdev *tdev);
117 static void cxgb_update_mac_settings(struct port_info *p);
118 
119 static device_method_t cxgb_controller_methods[] = {
120 	DEVMETHOD(device_probe,		cxgb_controller_probe),
121 	DEVMETHOD(device_attach,	cxgb_controller_attach),
122 	DEVMETHOD(device_detach,	cxgb_controller_detach),
123 
124 	/* bus interface */
125 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
126 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
127 
128 	{ 0, 0 }
129 };
130 
131 static driver_t cxgb_controller_driver = {
132 	"cxgbc",
133 	cxgb_controller_methods,
134 	sizeof(struct adapter)
135 };
136 
137 static devclass_t	cxgb_controller_devclass;
138 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
139 
140 /*
141  * Attachment glue for the ports.  Attachment is done directly to the
142  * controller device.
143  */
144 static int cxgb_port_probe(device_t);
145 static int cxgb_port_attach(device_t);
146 static int cxgb_port_detach(device_t);
147 
148 static device_method_t cxgb_port_methods[] = {
149 	DEVMETHOD(device_probe,		cxgb_port_probe),
150 	DEVMETHOD(device_attach,	cxgb_port_attach),
151 	DEVMETHOD(device_detach,	cxgb_port_detach),
152 	{ 0, 0 }
153 };
154 
155 static driver_t cxgb_port_driver = {
156 	"cxgb",
157 	cxgb_port_methods,
158 	0
159 };
160 
161 static d_ioctl_t cxgb_extension_ioctl;
162 static d_open_t cxgb_extension_open;
163 static d_close_t cxgb_extension_close;
164 
165 static struct cdevsw cxgb_cdevsw = {
166        .d_version =    D_VERSION,
167        .d_flags =      0,
168        .d_open =       cxgb_extension_open,
169        .d_close =      cxgb_extension_close,
170        .d_ioctl =      cxgb_extension_ioctl,
171        .d_name =       "cxgb",
172 };
173 
174 static devclass_t	cxgb_port_devclass;
175 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
176 
177 /*
178  * The driver uses the best interrupt scheme available on a platform in the
179  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
180  * of these schemes the driver may consider as follows:
181  *
182  * msi = 2: choose from among all three options
183  * msi = 1 : only consider MSI and pin interrupts
184  * msi = 0: force pin interrupts
185  */
186 static int msi_allowed = 2;
187 
188 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
189 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
190 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
191     "MSI-X, MSI, INTx selector");
192 
193 /*
194  * The driver enables offload as a default.
195  * To disable it, use ofld_disable = 1.
196  */
197 static int ofld_disable = 0;
198 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
199 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
200     "disable ULP offload");
201 
202 /*
203  * The driver uses an auto-queue algorithm by default.
204  * To disable it and force a single queue-set per port, use multiq = 0
205  */
206 static int multiq = 1;
207 TUNABLE_INT("hw.cxgb.multiq", &multiq);
208 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
209     "use min(ncpus/ports, 8) queue-sets per port");
210 
211 /*
212  * By default the driver will not update the firmware unless
213  * it was compiled against a newer version
214  *
215  */
216 static int force_fw_update = 0;
217 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
218 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
219     "update firmware even if up to date");
220 
221 int cxgb_use_16k_clusters = -1;
222 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
223 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
224     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
225 
226 /*
227  * Tune the size of the output queue.
228  */
229 int cxgb_snd_queue_len = IFQ_MAXLEN;
230 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
231 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
232     &cxgb_snd_queue_len, 0, "send queue size ");
233 
234 
235 enum {
236 	MAX_TXQ_ENTRIES      = 16384,
237 	MAX_CTRL_TXQ_ENTRIES = 1024,
238 	MAX_RSPQ_ENTRIES     = 16384,
239 	MAX_RX_BUFFERS       = 16384,
240 	MAX_RX_JUMBO_BUFFERS = 16384,
241 	MIN_TXQ_ENTRIES      = 4,
242 	MIN_CTRL_TXQ_ENTRIES = 4,
243 	MIN_RSPQ_ENTRIES     = 32,
244 	MIN_FL_ENTRIES       = 32,
245 	MIN_FL_JUMBO_ENTRIES = 32
246 };
247 
248 struct filter_info {
249 	u32 sip;
250 	u32 sip_mask;
251 	u32 dip;
252 	u16 sport;
253 	u16 dport;
254 	u32 vlan:12;
255 	u32 vlan_prio:3;
256 	u32 mac_hit:1;
257 	u32 mac_idx:4;
258 	u32 mac_vld:1;
259 	u32 pkt_type:2;
260 	u32 report_filter_id:1;
261 	u32 pass:1;
262 	u32 rss:1;
263 	u32 qset:3;
264 	u32 locked:1;
265 	u32 valid:1;
266 };
267 
268 enum { FILTER_NO_VLAN_PRI = 7 };
269 
270 #define EEPROM_MAGIC 0x38E2F10C
271 
272 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
273 
274 /* Table for probing the cards.  The desc field isn't actually used */
275 struct cxgb_ident {
276 	uint16_t	vendor;
277 	uint16_t	device;
278 	int		index;
279 	char		*desc;
280 } cxgb_identifiers[] = {
281 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295 	{0, 0, 0, NULL}
296 };
297 
298 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299 
300 
301 static __inline char
302 t3rev2char(struct adapter *adapter)
303 {
304 	char rev = 'z';
305 
306 	switch(adapter->params.rev) {
307 	case T3_REV_A:
308 		rev = 'a';
309 		break;
310 	case T3_REV_B:
311 	case T3_REV_B2:
312 		rev = 'b';
313 		break;
314 	case T3_REV_C:
315 		rev = 'c';
316 		break;
317 	}
318 	return rev;
319 }
320 
321 static struct cxgb_ident *
322 cxgb_get_ident(device_t dev)
323 {
324 	struct cxgb_ident *id;
325 
326 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
327 		if ((id->vendor == pci_get_vendor(dev)) &&
328 		    (id->device == pci_get_device(dev))) {
329 			return (id);
330 		}
331 	}
332 	return (NULL);
333 }
334 
335 static const struct adapter_info *
336 cxgb_get_adapter_info(device_t dev)
337 {
338 	struct cxgb_ident *id;
339 	const struct adapter_info *ai;
340 
341 	id = cxgb_get_ident(dev);
342 	if (id == NULL)
343 		return (NULL);
344 
345 	ai = t3_get_adapter_info(id->index);
346 
347 	return (ai);
348 }
349 
350 static int
351 cxgb_controller_probe(device_t dev)
352 {
353 	const struct adapter_info *ai;
354 	char *ports, buf[80];
355 	int nports;
356 
357 	ai = cxgb_get_adapter_info(dev);
358 	if (ai == NULL)
359 		return (ENXIO);
360 
361 	nports = ai->nports0 + ai->nports1;
362 	if (nports == 1)
363 		ports = "port";
364 	else
365 		ports = "ports";
366 
367 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368 	device_set_desc_copy(dev, buf);
369 	return (BUS_PROBE_DEFAULT);
370 }
371 
372 #define FW_FNAME "cxgb_t3fw"
373 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375 
376 static int
377 upgrade_fw(adapter_t *sc)
378 {
379 	const struct firmware *fw;
380 	int status;
381 	u32 vers;
382 
383 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
384 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
385 		return (ENOENT);
386 	} else
387 		device_printf(sc->dev, "installing firmware on card\n");
388 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
389 
390 	if (status != 0) {
391 		device_printf(sc->dev, "failed to install firmware: %d\n",
392 		    status);
393 	} else {
394 		t3_get_fw_version(sc, &vers);
395 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
396 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
397 		    G_FW_VERSION_MICRO(vers));
398 	}
399 
400 	firmware_put(fw, FIRMWARE_UNLOAD);
401 
402 	return (status);
403 }
404 
405 /*
406  * The cxgb_controller_attach function is responsible for the initial
407  * bringup of the device.  Its responsibilities include:
408  *
409  *  1. Determine if the device supports MSI or MSI-X.
410  *  2. Allocate bus resources so that we can access the Base Address Register
411  *  3. Create and initialize mutexes for the controller and its control
412  *     logic such as SGE and MDIO.
413  *  4. Call hardware specific setup routine for the adapter as a whole.
414  *  5. Allocate the BAR for doing MSI-X.
415  *  6. Setup the line interrupt iff MSI-X is not supported.
416  *  7. Create the driver's taskq.
417  *  8. Start one task queue service thread.
418  *  9. Check if the firmware and SRAM are up-to-date.  They will be
419  *     auto-updated later (before FULL_INIT_DONE), if required.
420  * 10. Create a child device for each MAC (port)
421  * 11. Initialize T3 private state.
422  * 12. Trigger the LED
423  * 13. Setup offload iff supported.
424  * 14. Reset/restart the tick callout.
425  * 15. Attach sysctls
426  *
427  * NOTE: Any modification or deviation from this list MUST be reflected in
428  * the above comment.  Failure to do so will result in problems on various
429  * error conditions including link flapping.
430  */
431 static int
432 cxgb_controller_attach(device_t dev)
433 {
434 	device_t child;
435 	const struct adapter_info *ai;
436 	struct adapter *sc;
437 	int i, error = 0;
438 	uint32_t vers;
439 	int port_qsets = 1;
440 	int msi_needed, reg;
441 	char buf[80];
442 
443 	sc = device_get_softc(dev);
444 	sc->dev = dev;
445 	sc->msi_count = 0;
446 	ai = cxgb_get_adapter_info(dev);
447 
448 	/* find the PCIe link width and set max read request to 4KB*/
449 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
450 		uint16_t lnk, pectl;
451 		lnk = pci_read_config(dev, reg + 0x12, 2);
452 		sc->link_width = (lnk >> 4) & 0x3f;
453 
454 		pectl = pci_read_config(dev, reg + 0x8, 2);
455 		pectl = (pectl & ~0x7000) | (5 << 12);
456 		pci_write_config(dev, reg + 0x8, pectl, 2);
457 	}
458 
459 	if (sc->link_width != 0 && sc->link_width <= 4 &&
460 	    (ai->nports0 + ai->nports1) <= 2) {
461 		device_printf(sc->dev,
462 		    "PCIe x%d Link, expect reduced performance\n",
463 		    sc->link_width);
464 	}
465 
466 	touch_bars(dev);
467 	pci_enable_busmaster(dev);
468 	/*
469 	 * Allocate the registers and make them available to the driver.
470 	 * The registers that we care about for NIC mode are in BAR 0
471 	 */
472 	sc->regs_rid = PCIR_BAR(0);
473 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
474 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
475 		device_printf(dev, "Cannot allocate BAR region 0\n");
476 		return (ENXIO);
477 	}
478 	sc->udbs_rid = PCIR_BAR(2);
479 	sc->udbs_res = NULL;
480 	if (is_offload(sc) &&
481 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
482 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
483 		device_printf(dev, "Cannot allocate BAR region 1\n");
484 		error = ENXIO;
485 		goto out;
486 	}
487 
488 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
489 	    device_get_unit(dev));
490 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
491 
492 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
493 	    device_get_unit(dev));
494 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
495 	    device_get_unit(dev));
496 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
497 	    device_get_unit(dev));
498 
499 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
500 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
501 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
502 
503 	sc->bt = rman_get_bustag(sc->regs_res);
504 	sc->bh = rman_get_bushandle(sc->regs_res);
505 	sc->mmio_len = rman_get_size(sc->regs_res);
506 
507 	for (i = 0; i < MAX_NPORTS; i++)
508 		sc->port[i].adapter = sc;
509 
510 	if (t3_prep_adapter(sc, ai, 1) < 0) {
511 		printf("prep adapter failed\n");
512 		error = ENODEV;
513 		goto out;
514 	}
515         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516 	 * enough messages for the queue sets.  If that fails, try falling
517 	 * back to MSI.  If that fails, then try falling back to the legacy
518 	 * interrupt pin model.
519 	 */
520 	sc->msix_regs_rid = 0x20;
521 	if ((msi_allowed >= 2) &&
522 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
524 
525 		if (multiq)
526 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
527 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
528 
529 		if (pci_msix_count(dev) == 0 ||
530 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
531 		    sc->msi_count != msi_needed) {
532 			device_printf(dev, "alloc msix failed - "
533 				      "msi_count=%d, msi_needed=%d, err=%d; "
534 				      "will try MSI\n", sc->msi_count,
535 				      msi_needed, error);
536 			sc->msi_count = 0;
537 			port_qsets = 1;
538 			pci_release_msi(dev);
539 			bus_release_resource(dev, SYS_RES_MEMORY,
540 			    sc->msix_regs_rid, sc->msix_regs_res);
541 			sc->msix_regs_res = NULL;
542 		} else {
543 			sc->flags |= USING_MSIX;
544 			sc->cxgb_intr = cxgb_async_intr;
545 			device_printf(dev,
546 				      "using MSI-X interrupts (%u vectors)\n",
547 				      sc->msi_count);
548 		}
549 	}
550 
551 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
552 		sc->msi_count = 1;
553 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
554 			device_printf(dev, "alloc msi failed - "
555 				      "err=%d; will try INTx\n", error);
556 			sc->msi_count = 0;
557 			port_qsets = 1;
558 			pci_release_msi(dev);
559 		} else {
560 			sc->flags |= USING_MSI;
561 			sc->cxgb_intr = t3_intr_msi;
562 			device_printf(dev, "using MSI interrupts\n");
563 		}
564 	}
565 	if (sc->msi_count == 0) {
566 		device_printf(dev, "using line interrupts\n");
567 		sc->cxgb_intr = t3b_intr;
568 	}
569 
570 	/* Create a private taskqueue thread for handling driver events */
571 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
572 	    taskqueue_thread_enqueue, &sc->tq);
573 	if (sc->tq == NULL) {
574 		device_printf(dev, "failed to allocate controller task queue\n");
575 		goto out;
576 	}
577 
578 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
579 	    device_get_nameunit(dev));
580 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
581 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
582 
583 
584 	/* Create a periodic callout for checking adapter status */
585 	callout_init(&sc->cxgb_tick_ch, TRUE);
586 
587 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
588 		/*
589 		 * Warn user that a firmware update will be attempted in init.
590 		 */
591 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
592 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
593 		sc->flags &= ~FW_UPTODATE;
594 	} else {
595 		sc->flags |= FW_UPTODATE;
596 	}
597 
598 	if (t3_check_tpsram_version(sc) < 0) {
599 		/*
600 		 * Warn user that a firmware update will be attempted in init.
601 		 */
602 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
603 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
604 		sc->flags &= ~TPS_UPTODATE;
605 	} else {
606 		sc->flags |= TPS_UPTODATE;
607 	}
608 
609 	/*
610 	 * Create a child device for each MAC.  The ethernet attachment
611 	 * will be done in these children.
612 	 */
613 	for (i = 0; i < (sc)->params.nports; i++) {
614 		struct port_info *pi;
615 
616 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
617 			device_printf(dev, "failed to add child port\n");
618 			error = EINVAL;
619 			goto out;
620 		}
621 		pi = &sc->port[i];
622 		pi->adapter = sc;
623 		pi->nqsets = port_qsets;
624 		pi->first_qset = i*port_qsets;
625 		pi->port_id = i;
626 		pi->tx_chan = i >= ai->nports0;
627 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
628 		sc->rxpkt_map[pi->txpkt_intf] = i;
629 		sc->port[i].tx_chan = i >= ai->nports0;
630 		sc->portdev[i] = child;
631 		device_set_softc(child, pi);
632 	}
633 	if ((error = bus_generic_attach(dev)) != 0)
634 		goto out;
635 
636 	/* initialize sge private state */
637 	t3_sge_init_adapter(sc);
638 
639 	t3_led_ready(sc);
640 
641 	cxgb_offload_init();
642 	if (is_offload(sc)) {
643 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
644 		cxgb_adapter_ofld(sc);
645         }
646 	error = t3_get_fw_version(sc, &vers);
647 	if (error)
648 		goto out;
649 
650 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
651 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
652 	    G_FW_VERSION_MICRO(vers));
653 
654 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
655 		 ai->desc, is_offload(sc) ? "R" : "",
656 		 sc->params.vpd.ec, sc->params.vpd.sn);
657 	device_set_desc_copy(dev, buf);
658 
659 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
660 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
661 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
662 
663 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
664 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
665 	t3_add_attach_sysctls(sc);
666 out:
667 	if (error)
668 		cxgb_free(sc);
669 
670 	return (error);
671 }
672 
673 /*
674  * The cxgb_controller_detach routine is called with the device is
675  * unloaded from the system.
676  */
677 
678 static int
679 cxgb_controller_detach(device_t dev)
680 {
681 	struct adapter *sc;
682 
683 	sc = device_get_softc(dev);
684 
685 	cxgb_free(sc);
686 
687 	return (0);
688 }
689 
690 /*
691  * The cxgb_free() is called by the cxgb_controller_detach() routine
692  * to tear down the structures that were built up in
693  * cxgb_controller_attach(), and should be the final piece of work
694  * done when fully unloading the driver.
695  *
696  *
697  *  1. Shutting down the threads started by the cxgb_controller_attach()
698  *     routine.
699  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
700  *  3. Detaching all of the port devices created during the
701  *     cxgb_controller_attach() routine.
702  *  4. Removing the device children created via cxgb_controller_attach().
703  *  5. Releasing PCI resources associated with the device.
704  *  6. Turning off the offload support, iff it was turned on.
705  *  7. Destroying the mutexes created in cxgb_controller_attach().
706  *
707  */
708 static void
709 cxgb_free(struct adapter *sc)
710 {
711 	int i;
712 
713 	ADAPTER_LOCK(sc);
714 	sc->flags |= CXGB_SHUTDOWN;
715 	ADAPTER_UNLOCK(sc);
716 
717 	/*
718 	 * Make sure all child devices are gone.
719 	 */
720 	bus_generic_detach(sc->dev);
721 	for (i = 0; i < (sc)->params.nports; i++) {
722 		if (sc->portdev[i] &&
723 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
724 			device_printf(sc->dev, "failed to delete child port\n");
725 	}
726 
727 	/*
728 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
729 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
730 	 * all open devices have been closed.
731 	 */
732 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
733 					   __func__, sc->open_device_map));
734 	for (i = 0; i < sc->params.nports; i++) {
735 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
736 						  __func__, i));
737 	}
738 
739 	/*
740 	 * Finish off the adapter's callouts.
741 	 */
742 	callout_drain(&sc->cxgb_tick_ch);
743 	callout_drain(&sc->sge_timer_ch);
744 
745 	/*
746 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
747 	 * sysctls are cleaned up by the kernel linker.
748 	 */
749 	if (sc->flags & FULL_INIT_DONE) {
750  		t3_free_sge_resources(sc);
751  		sc->flags &= ~FULL_INIT_DONE;
752  	}
753 
754 	/*
755 	 * Release all interrupt resources.
756 	 */
757 	cxgb_teardown_interrupts(sc);
758 	if (sc->flags & (USING_MSI | USING_MSIX)) {
759 		device_printf(sc->dev, "releasing msi message(s)\n");
760 		pci_release_msi(sc->dev);
761 	} else {
762 		device_printf(sc->dev, "no msi message to release\n");
763 	}
764 
765 	if (sc->msix_regs_res != NULL) {
766 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
767 		    sc->msix_regs_res);
768 	}
769 
770 	/*
771 	 * Free the adapter's taskqueue.
772 	 */
773 	if (sc->tq != NULL) {
774 		taskqueue_free(sc->tq);
775 		sc->tq = NULL;
776 	}
777 
778 	if (is_offload(sc)) {
779 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
780 		cxgb_adapter_unofld(sc);
781 	}
782 
783 #ifdef notyet
784 	if (sc->flags & CXGB_OFLD_INIT)
785 		cxgb_offload_deactivate(sc);
786 #endif
787 	free(sc->filters, M_DEVBUF);
788 	t3_sge_free(sc);
789 
790 	cxgb_offload_exit();
791 
792 	if (sc->udbs_res != NULL)
793 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
794 		    sc->udbs_res);
795 
796 	if (sc->regs_res != NULL)
797 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
798 		    sc->regs_res);
799 
800 	MTX_DESTROY(&sc->mdio_lock);
801 	MTX_DESTROY(&sc->sge.reg_lock);
802 	MTX_DESTROY(&sc->elmer_lock);
803 	ADAPTER_LOCK_DEINIT(sc);
804 }
805 
806 /**
807  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
808  *	@sc: the controller softc
809  *
810  *	Determines how many sets of SGE queues to use and initializes them.
811  *	We support multiple queue sets per port if we have MSI-X, otherwise
812  *	just one queue set per port.
813  */
814 static int
815 setup_sge_qsets(adapter_t *sc)
816 {
817 	int i, j, err, irq_idx = 0, qset_idx = 0;
818 	u_int ntxq = SGE_TXQ_PER_SET;
819 
820 	if ((err = t3_sge_alloc(sc)) != 0) {
821 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
822 		return (err);
823 	}
824 
825 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
826 		irq_idx = -1;
827 
828 	for (i = 0; i < (sc)->params.nports; i++) {
829 		struct port_info *pi = &sc->port[i];
830 
831 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
832 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
833 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
834 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
835 			if (err) {
836 				t3_free_sge_resources(sc);
837 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
838 				    err);
839 				return (err);
840 			}
841 		}
842 	}
843 
844 	return (0);
845 }
846 
847 static void
848 cxgb_teardown_interrupts(adapter_t *sc)
849 {
850 	int i;
851 
852 	for (i = 0; i < SGE_QSETS; i++) {
853 		if (sc->msix_intr_tag[i] == NULL) {
854 
855 			/* Should have been setup fully or not at all */
856 			KASSERT(sc->msix_irq_res[i] == NULL &&
857 				sc->msix_irq_rid[i] == 0,
858 				("%s: half-done interrupt (%d).", __func__, i));
859 
860 			continue;
861 		}
862 
863 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
864 				  sc->msix_intr_tag[i]);
865 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
866 				     sc->msix_irq_res[i]);
867 
868 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
869 		sc->msix_irq_rid[i] = 0;
870 	}
871 
872 	if (sc->intr_tag) {
873 		KASSERT(sc->irq_res != NULL,
874 			("%s: half-done interrupt.", __func__));
875 
876 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
877 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
878 				     sc->irq_res);
879 
880 		sc->irq_res = sc->intr_tag = NULL;
881 		sc->irq_rid = 0;
882 	}
883 }
884 
885 static int
886 cxgb_setup_interrupts(adapter_t *sc)
887 {
888 	struct resource *res;
889 	void *tag;
890 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
891 
892 	sc->irq_rid = intr_flag ? 1 : 0;
893 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
894 					     RF_SHAREABLE | RF_ACTIVE);
895 	if (sc->irq_res == NULL) {
896 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
897 			      intr_flag, sc->irq_rid);
898 		err = EINVAL;
899 		sc->irq_rid = 0;
900 	} else {
901 		err = bus_setup_intr(sc->dev, sc->irq_res,
902 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
903 		    sc->cxgb_intr, sc, &sc->intr_tag);
904 
905 		if (err) {
906 			device_printf(sc->dev,
907 				      "Cannot set up interrupt (%x, %u, %d)\n",
908 				      intr_flag, sc->irq_rid, err);
909 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
910 					     sc->irq_res);
911 			sc->irq_res = sc->intr_tag = NULL;
912 			sc->irq_rid = 0;
913 		}
914 	}
915 
916 	/* That's all for INTx or MSI */
917 	if (!(intr_flag & USING_MSIX) || err)
918 		return (err);
919 
920 	for (i = 0; i < sc->msi_count - 1; i++) {
921 		rid = i + 2;
922 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
923 					     RF_SHAREABLE | RF_ACTIVE);
924 		if (res == NULL) {
925 			device_printf(sc->dev, "Cannot allocate interrupt "
926 				      "for message %d\n", rid);
927 			err = EINVAL;
928 			break;
929 		}
930 
931 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
932 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
933 		if (err) {
934 			device_printf(sc->dev, "Cannot set up interrupt "
935 				      "for message %d (%d)\n", rid, err);
936 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
937 			break;
938 		}
939 
940 		sc->msix_irq_rid[i] = rid;
941 		sc->msix_irq_res[i] = res;
942 		sc->msix_intr_tag[i] = tag;
943 	}
944 
945 	if (err)
946 		cxgb_teardown_interrupts(sc);
947 
948 	return (err);
949 }
950 
951 
952 static int
953 cxgb_port_probe(device_t dev)
954 {
955 	struct port_info *p;
956 	char buf[80];
957 	const char *desc;
958 
959 	p = device_get_softc(dev);
960 	desc = p->phy.desc;
961 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
962 	device_set_desc_copy(dev, buf);
963 	return (0);
964 }
965 
966 
967 static int
968 cxgb_makedev(struct port_info *pi)
969 {
970 
971 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
972 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
973 
974 	if (pi->port_cdev == NULL)
975 		return (ENOMEM);
976 
977 	pi->port_cdev->si_drv1 = (void *)pi;
978 
979 	return (0);
980 }
981 
982 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
983     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
984     IFCAP_VLAN_HWTSO)
985 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
986 
987 static int
988 cxgb_port_attach(device_t dev)
989 {
990 	struct port_info *p;
991 	struct ifnet *ifp;
992 	int err;
993 	struct adapter *sc;
994 
995 	p = device_get_softc(dev);
996 	sc = p->adapter;
997 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
998 	    device_get_unit(device_get_parent(dev)), p->port_id);
999 	PORT_LOCK_INIT(p, p->lockbuf);
1000 
1001 	/* Allocate an ifnet object and set it up */
1002 	ifp = p->ifp = if_alloc(IFT_ETHER);
1003 	if (ifp == NULL) {
1004 		device_printf(dev, "Cannot allocate ifnet\n");
1005 		return (ENOMEM);
1006 	}
1007 
1008 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1009 	ifp->if_init = cxgb_init;
1010 	ifp->if_softc = p;
1011 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1012 	ifp->if_ioctl = cxgb_ioctl;
1013 	ifp->if_start = cxgb_start;
1014 
1015 	ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1016 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1017 	IFQ_SET_READY(&ifp->if_snd);
1018 
1019 	ifp->if_capabilities = CXGB_CAP;
1020 	ifp->if_capenable = CXGB_CAP_ENABLE;
1021 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1022 
1023 	/*
1024 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1025 	 */
1026 	if (sc->params.nports > 2) {
1027 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1028 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1029 		ifp->if_hwassist &= ~CSUM_TSO;
1030 	}
1031 
1032 	ether_ifattach(ifp, p->hw_addr);
1033 	ifp->if_transmit = cxgb_transmit;
1034 	ifp->if_qflush = cxgb_qflush;
1035 
1036 #ifdef DEFAULT_JUMBO
1037 	if (sc->params.nports <= 2)
1038 		ifp->if_mtu = ETHERMTU_JUMBO;
1039 #endif
1040 	if ((err = cxgb_makedev(p)) != 0) {
1041 		printf("makedev failed %d\n", err);
1042 		return (err);
1043 	}
1044 
1045 	/* Create a list of media supported by this port */
1046 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1047 	    cxgb_media_status);
1048 	cxgb_build_medialist(p);
1049 
1050 	t3_sge_init_port(p);
1051 
1052 	return (err);
1053 }
1054 
1055 /*
1056  * cxgb_port_detach() is called via the device_detach methods when
1057  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1058  * removing the device from the view of the kernel, i.e. from all
1059  * interfaces lists etc.  This routine is only called when the driver is
1060  * being unloaded, not when the link goes down.
1061  */
1062 static int
1063 cxgb_port_detach(device_t dev)
1064 {
1065 	struct port_info *p;
1066 	struct adapter *sc;
1067 	int i;
1068 
1069 	p = device_get_softc(dev);
1070 	sc = p->adapter;
1071 
1072 	/* Tell cxgb_ioctl and if_init that the port is going away */
1073 	ADAPTER_LOCK(sc);
1074 	SET_DOOMED(p);
1075 	wakeup(&sc->flags);
1076 	while (IS_BUSY(sc))
1077 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1078 	SET_BUSY(sc);
1079 	ADAPTER_UNLOCK(sc);
1080 
1081 	if (p->port_cdev != NULL)
1082 		destroy_dev(p->port_cdev);
1083 
1084 	cxgb_uninit_synchronized(p);
1085 	ether_ifdetach(p->ifp);
1086 
1087 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1088 		struct sge_qset *qs = &sc->sge.qs[i];
1089 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1090 
1091 		callout_drain(&txq->txq_watchdog);
1092 		callout_drain(&txq->txq_timer);
1093 	}
1094 
1095 	PORT_LOCK_DEINIT(p);
1096 	if_free(p->ifp);
1097 	p->ifp = NULL;
1098 
1099 	ADAPTER_LOCK(sc);
1100 	CLR_BUSY(sc);
1101 	wakeup_one(&sc->flags);
1102 	ADAPTER_UNLOCK(sc);
1103 	return (0);
1104 }
1105 
1106 void
1107 t3_fatal_err(struct adapter *sc)
1108 {
1109 	u_int fw_status[4];
1110 
1111 	if (sc->flags & FULL_INIT_DONE) {
1112 		t3_sge_stop(sc);
1113 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1114 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1115 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1116 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1117 		t3_intr_disable(sc);
1118 	}
1119 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1120 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1121 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1122 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1123 }
1124 
1125 int
1126 t3_os_find_pci_capability(adapter_t *sc, int cap)
1127 {
1128 	device_t dev;
1129 	struct pci_devinfo *dinfo;
1130 	pcicfgregs *cfg;
1131 	uint32_t status;
1132 	uint8_t ptr;
1133 
1134 	dev = sc->dev;
1135 	dinfo = device_get_ivars(dev);
1136 	cfg = &dinfo->cfg;
1137 
1138 	status = pci_read_config(dev, PCIR_STATUS, 2);
1139 	if (!(status & PCIM_STATUS_CAPPRESENT))
1140 		return (0);
1141 
1142 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1143 	case 0:
1144 	case 1:
1145 		ptr = PCIR_CAP_PTR;
1146 		break;
1147 	case 2:
1148 		ptr = PCIR_CAP_PTR_2;
1149 		break;
1150 	default:
1151 		return (0);
1152 		break;
1153 	}
1154 	ptr = pci_read_config(dev, ptr, 1);
1155 
1156 	while (ptr != 0) {
1157 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1158 			return (ptr);
1159 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1160 	}
1161 
1162 	return (0);
1163 }
1164 
1165 int
1166 t3_os_pci_save_state(struct adapter *sc)
1167 {
1168 	device_t dev;
1169 	struct pci_devinfo *dinfo;
1170 
1171 	dev = sc->dev;
1172 	dinfo = device_get_ivars(dev);
1173 
1174 	pci_cfg_save(dev, dinfo, 0);
1175 	return (0);
1176 }
1177 
1178 int
1179 t3_os_pci_restore_state(struct adapter *sc)
1180 {
1181 	device_t dev;
1182 	struct pci_devinfo *dinfo;
1183 
1184 	dev = sc->dev;
1185 	dinfo = device_get_ivars(dev);
1186 
1187 	pci_cfg_restore(dev, dinfo);
1188 	return (0);
1189 }
1190 
1191 /**
1192  *	t3_os_link_changed - handle link status changes
1193  *	@sc: the adapter associated with the link change
1194  *	@port_id: the port index whose link status has changed
1195  *	@link_status: the new status of the link
1196  *	@speed: the new speed setting
1197  *	@duplex: the new duplex setting
1198  *	@fc: the new flow-control setting
1199  *
1200  *	This is the OS-dependent handler for link status changes.  The OS
1201  *	neutral handler takes care of most of the processing for these events,
1202  *	then calls this handler for any OS-specific processing.
1203  */
1204 void
1205 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1206      int duplex, int fc, int mac_was_reset)
1207 {
1208 	struct port_info *pi = &adapter->port[port_id];
1209 	struct ifnet *ifp = pi->ifp;
1210 
1211 	/* no race with detach, so ifp should always be good */
1212 	KASSERT(ifp, ("%s: if detached.", __func__));
1213 
1214 	/* Reapply mac settings if they were lost due to a reset */
1215 	if (mac_was_reset) {
1216 		PORT_LOCK(pi);
1217 		cxgb_update_mac_settings(pi);
1218 		PORT_UNLOCK(pi);
1219 	}
1220 
1221 	if (link_status) {
1222 		ifp->if_baudrate = IF_Mbps(speed);
1223 		if_link_state_change(ifp, LINK_STATE_UP);
1224 	} else
1225 		if_link_state_change(ifp, LINK_STATE_DOWN);
1226 }
1227 
1228 /**
1229  *	t3_os_phymod_changed - handle PHY module changes
1230  *	@phy: the PHY reporting the module change
1231  *	@mod_type: new module type
1232  *
1233  *	This is the OS-dependent handler for PHY module changes.  It is
1234  *	invoked when a PHY module is removed or inserted for any OS-specific
1235  *	processing.
1236  */
1237 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1238 {
1239 	static const char *mod_str[] = {
1240 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1241 	};
1242 	struct port_info *pi = &adap->port[port_id];
1243 	int mod = pi->phy.modtype;
1244 
1245 	if (mod != pi->media.ifm_cur->ifm_data)
1246 		cxgb_build_medialist(pi);
1247 
1248 	if (mod == phy_modtype_none)
1249 		if_printf(pi->ifp, "PHY module unplugged\n");
1250 	else {
1251 		KASSERT(mod < ARRAY_SIZE(mod_str),
1252 			("invalid PHY module type %d", mod));
1253 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1254 	}
1255 }
1256 
1257 /*
1258  * Interrupt-context handler for external (PHY) interrupts.
1259  */
1260 void
1261 t3_os_ext_intr_handler(adapter_t *sc)
1262 {
1263 	if (cxgb_debug)
1264 		printf("t3_os_ext_intr_handler\n");
1265 	/*
1266 	 * Schedule a task to handle external interrupts as they may be slow
1267 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1268 	 * interrupts in the meantime and let the task reenable them when
1269 	 * it's done.
1270 	 */
1271 	if (sc->slow_intr_mask) {
1272 		ADAPTER_LOCK(sc);
1273 		sc->slow_intr_mask &= ~F_T3DBG;
1274 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1275 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1276 		ADAPTER_UNLOCK(sc);
1277 	}
1278 }
1279 
1280 void
1281 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1282 {
1283 
1284 	/*
1285 	 * The ifnet might not be allocated before this gets called,
1286 	 * as this is called early on in attach by t3_prep_adapter
1287 	 * save the address off in the port structure
1288 	 */
1289 	if (cxgb_debug)
1290 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1291 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1292 }
1293 
1294 /*
1295  * Programs the XGMAC based on the settings in the ifnet.  These settings
1296  * include MTU, MAC address, mcast addresses, etc.
1297  */
1298 static void
1299 cxgb_update_mac_settings(struct port_info *p)
1300 {
1301 	struct ifnet *ifp = p->ifp;
1302 	struct t3_rx_mode rm;
1303 	struct cmac *mac = &p->mac;
1304 	int mtu, hwtagging;
1305 
1306 	PORT_LOCK_ASSERT_OWNED(p);
1307 
1308 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1309 
1310 	mtu = ifp->if_mtu;
1311 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1312 		mtu += ETHER_VLAN_ENCAP_LEN;
1313 
1314 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1315 
1316 	t3_mac_set_mtu(mac, mtu);
1317 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1318 	t3_mac_set_address(mac, 0, p->hw_addr);
1319 	t3_init_rx_mode(&rm, p);
1320 	t3_mac_set_rx_mode(mac, &rm);
1321 }
1322 
1323 
1324 static int
1325 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1326 			      unsigned long n)
1327 {
1328 	int attempts = 5;
1329 
1330 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1331 		if (!--attempts)
1332 			return (ETIMEDOUT);
1333 		t3_os_sleep(10);
1334 	}
1335 	return 0;
1336 }
1337 
1338 static int
1339 init_tp_parity(struct adapter *adap)
1340 {
1341 	int i;
1342 	struct mbuf *m;
1343 	struct cpl_set_tcb_field *greq;
1344 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1345 
1346 	t3_tp_set_offload_mode(adap, 1);
1347 
1348 	for (i = 0; i < 16; i++) {
1349 		struct cpl_smt_write_req *req;
1350 
1351 		m = m_gethdr(M_WAITOK, MT_DATA);
1352 		req = mtod(m, struct cpl_smt_write_req *);
1353 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1354 		memset(req, 0, sizeof(*req));
1355 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1356 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1357 		req->iff = i;
1358 		t3_mgmt_tx(adap, m);
1359 	}
1360 
1361 	for (i = 0; i < 2048; i++) {
1362 		struct cpl_l2t_write_req *req;
1363 
1364 		m = m_gethdr(M_WAITOK, MT_DATA);
1365 		req = mtod(m, struct cpl_l2t_write_req *);
1366 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1367 		memset(req, 0, sizeof(*req));
1368 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1369 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1370 		req->params = htonl(V_L2T_W_IDX(i));
1371 		t3_mgmt_tx(adap, m);
1372 	}
1373 
1374 	for (i = 0; i < 2048; i++) {
1375 		struct cpl_rte_write_req *req;
1376 
1377 		m = m_gethdr(M_WAITOK, MT_DATA);
1378 		req = mtod(m, struct cpl_rte_write_req *);
1379 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1380 		memset(req, 0, sizeof(*req));
1381 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1383 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1384 		t3_mgmt_tx(adap, m);
1385 	}
1386 
1387 	m = m_gethdr(M_WAITOK, MT_DATA);
1388 	greq = mtod(m, struct cpl_set_tcb_field *);
1389 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1390 	memset(greq, 0, sizeof(*greq));
1391 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1392 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1393 	greq->mask = htobe64(1);
1394 	t3_mgmt_tx(adap, m);
1395 
1396 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1397 	t3_tp_set_offload_mode(adap, 0);
1398 	return (i);
1399 }
1400 
1401 /**
1402  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1403  *	@adap: the adapter
1404  *
1405  *	Sets up RSS to distribute packets to multiple receive queues.  We
1406  *	configure the RSS CPU lookup table to distribute to the number of HW
1407  *	receive queues, and the response queue lookup table to narrow that
1408  *	down to the response queues actually configured for each port.
1409  *	We always configure the RSS mapping for two ports since the mapping
1410  *	table has plenty of entries.
1411  */
1412 static void
1413 setup_rss(adapter_t *adap)
1414 {
1415 	int i;
1416 	u_int nq[2];
1417 	uint8_t cpus[SGE_QSETS + 1];
1418 	uint16_t rspq_map[RSS_TABLE_SIZE];
1419 
1420 	for (i = 0; i < SGE_QSETS; ++i)
1421 		cpus[i] = i;
1422 	cpus[SGE_QSETS] = 0xff;
1423 
1424 	nq[0] = nq[1] = 0;
1425 	for_each_port(adap, i) {
1426 		const struct port_info *pi = adap2pinfo(adap, i);
1427 
1428 		nq[pi->tx_chan] += pi->nqsets;
1429 	}
1430 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1431 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1432 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1433 	}
1434 
1435 	/* Calculate the reverse RSS map table */
1436 	for (i = 0; i < SGE_QSETS; ++i)
1437 		adap->rrss_map[i] = 0xff;
1438 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1439 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1440 			adap->rrss_map[rspq_map[i]] = i;
1441 
1442 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1443 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1444 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1445 	              cpus, rspq_map);
1446 
1447 }
1448 
1449 /*
1450  * Sends an mbuf to an offload queue driver
1451  * after dealing with any active network taps.
1452  */
1453 static inline int
1454 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1455 {
1456 	int ret;
1457 
1458 	ret = t3_offload_tx(tdev, m);
1459 	return (ret);
1460 }
1461 
1462 static int
1463 write_smt_entry(struct adapter *adapter, int idx)
1464 {
1465 	struct port_info *pi = &adapter->port[idx];
1466 	struct cpl_smt_write_req *req;
1467 	struct mbuf *m;
1468 
1469 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1470 		return (ENOMEM);
1471 
1472 	req = mtod(m, struct cpl_smt_write_req *);
1473 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1474 
1475 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1476 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1477 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1478 	req->iff = idx;
1479 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1480 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1481 
1482 	m_set_priority(m, 1);
1483 
1484 	offload_tx(&adapter->tdev, m);
1485 
1486 	return (0);
1487 }
1488 
1489 static int
1490 init_smt(struct adapter *adapter)
1491 {
1492 	int i;
1493 
1494 	for_each_port(adapter, i)
1495 		write_smt_entry(adapter, i);
1496 	return 0;
1497 }
1498 
1499 static void
1500 init_port_mtus(adapter_t *adapter)
1501 {
1502 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1503 
1504 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1505 }
1506 
1507 static void
1508 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1509 			      int hi, int port)
1510 {
1511 	struct mbuf *m;
1512 	struct mngt_pktsched_wr *req;
1513 
1514 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1515 	if (m) {
1516 		req = mtod(m, struct mngt_pktsched_wr *);
1517 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1518 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1519 		req->sched = sched;
1520 		req->idx = qidx;
1521 		req->min = lo;
1522 		req->max = hi;
1523 		req->binding = port;
1524 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1525 		t3_mgmt_tx(adap, m);
1526 	}
1527 }
1528 
1529 static void
1530 bind_qsets(adapter_t *sc)
1531 {
1532 	int i, j;
1533 
1534 	for (i = 0; i < (sc)->params.nports; ++i) {
1535 		const struct port_info *pi = adap2pinfo(sc, i);
1536 
1537 		for (j = 0; j < pi->nqsets; ++j) {
1538 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1539 					  -1, pi->tx_chan);
1540 
1541 		}
1542 	}
1543 }
1544 
1545 static void
1546 update_tpeeprom(struct adapter *adap)
1547 {
1548 	const struct firmware *tpeeprom;
1549 
1550 	uint32_t version;
1551 	unsigned int major, minor;
1552 	int ret, len;
1553 	char rev, name[32];
1554 
1555 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1556 
1557 	major = G_TP_VERSION_MAJOR(version);
1558 	minor = G_TP_VERSION_MINOR(version);
1559 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1560 		return;
1561 
1562 	rev = t3rev2char(adap);
1563 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1564 
1565 	tpeeprom = firmware_get(name);
1566 	if (tpeeprom == NULL) {
1567 		device_printf(adap->dev,
1568 			      "could not load TP EEPROM: unable to load %s\n",
1569 			      name);
1570 		return;
1571 	}
1572 
1573 	len = tpeeprom->datasize - 4;
1574 
1575 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1576 	if (ret)
1577 		goto release_tpeeprom;
1578 
1579 	if (len != TP_SRAM_LEN) {
1580 		device_printf(adap->dev,
1581 			      "%s length is wrong len=%d expected=%d\n", name,
1582 			      len, TP_SRAM_LEN);
1583 		return;
1584 	}
1585 
1586 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1587 	    TP_SRAM_OFFSET);
1588 
1589 	if (!ret) {
1590 		device_printf(adap->dev,
1591 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1592 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1593 	} else
1594 		device_printf(adap->dev,
1595 			      "Protocol SRAM image update in EEPROM failed\n");
1596 
1597 release_tpeeprom:
1598 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1599 
1600 	return;
1601 }
1602 
1603 static int
1604 update_tpsram(struct adapter *adap)
1605 {
1606 	const struct firmware *tpsram;
1607 	int ret;
1608 	char rev, name[32];
1609 
1610 	rev = t3rev2char(adap);
1611 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1612 
1613 	update_tpeeprom(adap);
1614 
1615 	tpsram = firmware_get(name);
1616 	if (tpsram == NULL){
1617 		device_printf(adap->dev, "could not load TP SRAM\n");
1618 		return (EINVAL);
1619 	} else
1620 		device_printf(adap->dev, "updating TP SRAM\n");
1621 
1622 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1623 	if (ret)
1624 		goto release_tpsram;
1625 
1626 	ret = t3_set_proto_sram(adap, tpsram->data);
1627 	if (ret)
1628 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1629 
1630 release_tpsram:
1631 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1632 
1633 	return ret;
1634 }
1635 
1636 /**
1637  *	cxgb_up - enable the adapter
1638  *	@adap: adapter being enabled
1639  *
1640  *	Called when the first port is enabled, this function performs the
1641  *	actions necessary to make an adapter operational, such as completing
1642  *	the initialization of HW modules, and enabling interrupts.
1643  */
1644 static int
1645 cxgb_up(struct adapter *sc)
1646 {
1647 	int err = 0;
1648 
1649 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1650 					   __func__, sc->open_device_map));
1651 
1652 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1653 
1654 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1655 
1656 		if ((sc->flags & FW_UPTODATE) == 0)
1657 			if ((err = upgrade_fw(sc)))
1658 				goto out;
1659 
1660 		if ((sc->flags & TPS_UPTODATE) == 0)
1661 			if ((err = update_tpsram(sc)))
1662 				goto out;
1663 
1664 		err = t3_init_hw(sc, 0);
1665 		if (err)
1666 			goto out;
1667 
1668 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1669 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1670 
1671 		err = setup_sge_qsets(sc);
1672 		if (err)
1673 			goto out;
1674 
1675 		setup_rss(sc);
1676 
1677 		t3_intr_clear(sc);
1678 		err = cxgb_setup_interrupts(sc);
1679 		if (err)
1680 			goto out;
1681 
1682 		t3_add_configured_sysctls(sc);
1683 		sc->flags |= FULL_INIT_DONE;
1684 	}
1685 
1686 	t3_intr_clear(sc);
1687 	t3_sge_start(sc);
1688 	t3_intr_enable(sc);
1689 
1690 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1691 	    is_offload(sc) && init_tp_parity(sc) == 0)
1692 		sc->flags |= TP_PARITY_INIT;
1693 
1694 	if (sc->flags & TP_PARITY_INIT) {
1695 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1696 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1697 	}
1698 
1699 	if (!(sc->flags & QUEUES_BOUND)) {
1700 		bind_qsets(sc);
1701 		sc->flags |= QUEUES_BOUND;
1702 	}
1703 
1704 	t3_sge_reset_adapter(sc);
1705 out:
1706 	return (err);
1707 }
1708 
1709 /*
1710  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1711  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1712  * during controller_detach, not here.
1713  */
1714 static void
1715 cxgb_down(struct adapter *sc)
1716 {
1717 	t3_sge_stop(sc);
1718 	t3_intr_disable(sc);
1719 }
1720 
1721 static int
1722 offload_open(struct port_info *pi)
1723 {
1724 	struct adapter *sc = pi->adapter;
1725 	struct t3cdev *tdev = &sc->tdev;
1726 
1727 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1728 
1729 	t3_tp_set_offload_mode(sc, 1);
1730 	tdev->lldev = pi->ifp;
1731 	init_port_mtus(sc);
1732 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1733 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1734 	init_smt(sc);
1735 	cxgb_add_clients(tdev);
1736 
1737 	return (0);
1738 }
1739 
1740 static int
1741 offload_close(struct t3cdev *tdev)
1742 {
1743 	struct adapter *adapter = tdev2adap(tdev);
1744 
1745 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1746 		return (0);
1747 
1748 	/* Call back all registered clients */
1749 	cxgb_remove_clients(tdev);
1750 
1751 	tdev->lldev = NULL;
1752 	cxgb_set_dummy_ops(tdev);
1753 	t3_tp_set_offload_mode(adapter, 0);
1754 
1755 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1756 
1757 	return (0);
1758 }
1759 
1760 /*
1761  * if_init for cxgb ports.
1762  */
1763 static void
1764 cxgb_init(void *arg)
1765 {
1766 	struct port_info *p = arg;
1767 	struct adapter *sc = p->adapter;
1768 
1769 	ADAPTER_LOCK(sc);
1770 	cxgb_init_locked(p); /* releases adapter lock */
1771 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1772 }
1773 
1774 static int
1775 cxgb_init_locked(struct port_info *p)
1776 {
1777 	struct adapter *sc = p->adapter;
1778 	struct ifnet *ifp = p->ifp;
1779 	struct cmac *mac = &p->mac;
1780 	int i, rc = 0, may_sleep = 0;
1781 
1782 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1783 
1784 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1785 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1786 			rc = EINTR;
1787 			goto done;
1788 		}
1789 	}
1790 	if (IS_DOOMED(p)) {
1791 		rc = ENXIO;
1792 		goto done;
1793 	}
1794 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1795 
1796 	/*
1797 	 * The code that runs during one-time adapter initialization can sleep
1798 	 * so it's important not to hold any locks across it.
1799 	 */
1800 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1801 
1802 	if (may_sleep) {
1803 		SET_BUSY(sc);
1804 		ADAPTER_UNLOCK(sc);
1805 	}
1806 
1807 	if (sc->open_device_map == 0) {
1808 		if ((rc = cxgb_up(sc)) != 0)
1809 			goto done;
1810 
1811 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1812 			log(LOG_WARNING,
1813 			    "Could not initialize offload capabilities\n");
1814 	}
1815 
1816 	PORT_LOCK(p);
1817 	if (isset(&sc->open_device_map, p->port_id) &&
1818 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1819 		PORT_UNLOCK(p);
1820 		goto done;
1821 	}
1822 	t3_port_intr_enable(sc, p->port_id);
1823 	if (!mac->multiport)
1824 		t3_mac_init(mac);
1825 	cxgb_update_mac_settings(p);
1826 	t3_link_start(&p->phy, mac, &p->link_config);
1827 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1828 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1829 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1830 	PORT_UNLOCK(p);
1831 
1832 	t3_link_changed(sc, p->port_id);
1833 
1834 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1835 		struct sge_qset *qs = &sc->sge.qs[i];
1836 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1837 
1838 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1839 				 txq->txq_watchdog.c_cpu);
1840 	}
1841 
1842 	/* all ok */
1843 	setbit(&sc->open_device_map, p->port_id);
1844 
1845 done:
1846 	if (may_sleep) {
1847 		ADAPTER_LOCK(sc);
1848 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1849 		CLR_BUSY(sc);
1850 		wakeup_one(&sc->flags);
1851 	}
1852 	ADAPTER_UNLOCK(sc);
1853 	return (rc);
1854 }
1855 
1856 static int
1857 cxgb_uninit_locked(struct port_info *p)
1858 {
1859 	struct adapter *sc = p->adapter;
1860 	int rc;
1861 
1862 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1863 
1864 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1865 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1866 			rc = EINTR;
1867 			goto done;
1868 		}
1869 	}
1870 	if (IS_DOOMED(p)) {
1871 		rc = ENXIO;
1872 		goto done;
1873 	}
1874 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1875 	SET_BUSY(sc);
1876 	ADAPTER_UNLOCK(sc);
1877 
1878 	rc = cxgb_uninit_synchronized(p);
1879 
1880 	ADAPTER_LOCK(sc);
1881 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1882 	CLR_BUSY(sc);
1883 	wakeup_one(&sc->flags);
1884 done:
1885 	ADAPTER_UNLOCK(sc);
1886 	return (rc);
1887 }
1888 
1889 /*
1890  * Called on "ifconfig down", and from port_detach
1891  */
1892 static int
1893 cxgb_uninit_synchronized(struct port_info *pi)
1894 {
1895 	struct adapter *sc = pi->adapter;
1896 	struct ifnet *ifp = pi->ifp;
1897 
1898 	/*
1899 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1900 	 */
1901 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1902 
1903 	/*
1904 	 * Clear this port's bit from the open device map, and then drain all
1905 	 * the tasks that can access/manipulate this port's port_info or ifp.
1906 	 * We disable this port's interrupts here and so the the slow/ext
1907 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1908 	 * be enqueued every second but the runs after this drain will not see
1909 	 * this port in the open device map.
1910 	 *
1911 	 * A well behaved task must take open_device_map into account and ignore
1912 	 * ports that are not open.
1913 	 */
1914 	clrbit(&sc->open_device_map, pi->port_id);
1915 	t3_port_intr_disable(sc, pi->port_id);
1916 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1917 	taskqueue_drain(sc->tq, &sc->ext_intr_task);
1918 	taskqueue_drain(sc->tq, &sc->tick_task);
1919 
1920 	PORT_LOCK(pi);
1921 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1922 
1923 	/* disable pause frames */
1924 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1925 
1926 	/* Reset RX FIFO HWM */
1927 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1928 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1929 
1930 	DELAY(100 * 1000);
1931 
1932 	/* Wait for TXFIFO empty */
1933 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1934 			F_TXFIFO_EMPTY, 1, 20, 5);
1935 
1936 	DELAY(100 * 1000);
1937 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1938 
1939 
1940 	pi->phy.ops->power_down(&pi->phy, 1);
1941 
1942 	PORT_UNLOCK(pi);
1943 
1944 	pi->link_config.link_ok = 0;
1945 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1946 
1947 	if ((sc->open_device_map & PORT_MASK) == 0)
1948 		offload_close(&sc->tdev);
1949 
1950 	if (sc->open_device_map == 0)
1951 		cxgb_down(pi->adapter);
1952 
1953 	return (0);
1954 }
1955 
1956 /*
1957  * Mark lro enabled or disabled in all qsets for this port
1958  */
1959 static int
1960 cxgb_set_lro(struct port_info *p, int enabled)
1961 {
1962 	int i;
1963 	struct adapter *adp = p->adapter;
1964 	struct sge_qset *q;
1965 
1966 	PORT_LOCK_ASSERT_OWNED(p);
1967 	for (i = 0; i < p->nqsets; i++) {
1968 		q = &adp->sge.qs[p->first_qset + i];
1969 		q->lro.enabled = (enabled != 0);
1970 	}
1971 	return (0);
1972 }
1973 
1974 static int
1975 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1976 {
1977 	struct port_info *p = ifp->if_softc;
1978 	struct adapter *sc = p->adapter;
1979 	struct ifreq *ifr = (struct ifreq *)data;
1980 	int flags, error = 0, mtu;
1981 	uint32_t mask;
1982 
1983 	switch (command) {
1984 	case SIOCSIFMTU:
1985 		ADAPTER_LOCK(sc);
1986 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1987 		if (error) {
1988 fail:
1989 			ADAPTER_UNLOCK(sc);
1990 			return (error);
1991 		}
1992 
1993 		mtu = ifr->ifr_mtu;
1994 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1995 			error = EINVAL;
1996 		} else {
1997 			ifp->if_mtu = mtu;
1998 			PORT_LOCK(p);
1999 			cxgb_update_mac_settings(p);
2000 			PORT_UNLOCK(p);
2001 		}
2002 		ADAPTER_UNLOCK(sc);
2003 		break;
2004 	case SIOCSIFFLAGS:
2005 		ADAPTER_LOCK(sc);
2006 		if (IS_DOOMED(p)) {
2007 			error = ENXIO;
2008 			goto fail;
2009 		}
2010 		if (ifp->if_flags & IFF_UP) {
2011 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2012 				flags = p->if_flags;
2013 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2014 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2015 					if (IS_BUSY(sc)) {
2016 						error = EBUSY;
2017 						goto fail;
2018 					}
2019 					PORT_LOCK(p);
2020 					cxgb_update_mac_settings(p);
2021 					PORT_UNLOCK(p);
2022 				}
2023 				ADAPTER_UNLOCK(sc);
2024 			} else
2025 				error = cxgb_init_locked(p);
2026 			p->if_flags = ifp->if_flags;
2027 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2028 			error = cxgb_uninit_locked(p);
2029 		else
2030 			ADAPTER_UNLOCK(sc);
2031 
2032 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2033 		break;
2034 	case SIOCADDMULTI:
2035 	case SIOCDELMULTI:
2036 		ADAPTER_LOCK(sc);
2037 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2038 		if (error)
2039 			goto fail;
2040 
2041 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2042 			PORT_LOCK(p);
2043 			cxgb_update_mac_settings(p);
2044 			PORT_UNLOCK(p);
2045 		}
2046 		ADAPTER_UNLOCK(sc);
2047 
2048 		break;
2049 	case SIOCSIFCAP:
2050 		ADAPTER_LOCK(sc);
2051 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2052 		if (error)
2053 			goto fail;
2054 
2055 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2056 		if (mask & IFCAP_TXCSUM) {
2057 			ifp->if_capenable ^= IFCAP_TXCSUM;
2058 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2059 
2060 			if (IFCAP_TSO & ifp->if_capenable &&
2061 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2062 				ifp->if_capenable &= ~IFCAP_TSO;
2063 				ifp->if_hwassist &= ~CSUM_TSO;
2064 				if_printf(ifp,
2065 				    "tso disabled due to -txcsum.\n");
2066 			}
2067 		}
2068 		if (mask & IFCAP_RXCSUM)
2069 			ifp->if_capenable ^= IFCAP_RXCSUM;
2070 		if (mask & IFCAP_TSO4) {
2071 			ifp->if_capenable ^= IFCAP_TSO4;
2072 
2073 			if (IFCAP_TSO & ifp->if_capenable) {
2074 				if (IFCAP_TXCSUM & ifp->if_capenable)
2075 					ifp->if_hwassist |= CSUM_TSO;
2076 				else {
2077 					ifp->if_capenable &= ~IFCAP_TSO;
2078 					ifp->if_hwassist &= ~CSUM_TSO;
2079 					if_printf(ifp,
2080 					    "enable txcsum first.\n");
2081 					error = EAGAIN;
2082 				}
2083 			} else
2084 				ifp->if_hwassist &= ~CSUM_TSO;
2085 		}
2086 		if (mask & IFCAP_LRO) {
2087 			ifp->if_capenable ^= IFCAP_LRO;
2088 
2089 			/* Safe to do this even if cxgb_up not called yet */
2090 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2091 		}
2092 		if (mask & IFCAP_VLAN_HWTAGGING) {
2093 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2094 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2095 				PORT_LOCK(p);
2096 				cxgb_update_mac_settings(p);
2097 				PORT_UNLOCK(p);
2098 			}
2099 		}
2100 		if (mask & IFCAP_VLAN_MTU) {
2101 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2102 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2103 				PORT_LOCK(p);
2104 				cxgb_update_mac_settings(p);
2105 				PORT_UNLOCK(p);
2106 			}
2107 		}
2108 		if (mask & IFCAP_VLAN_HWTSO)
2109 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2110 		if (mask & IFCAP_VLAN_HWCSUM)
2111 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2112 
2113 #ifdef VLAN_CAPABILITIES
2114 		VLAN_CAPABILITIES(ifp);
2115 #endif
2116 		ADAPTER_UNLOCK(sc);
2117 		break;
2118 	case SIOCSIFMEDIA:
2119 	case SIOCGIFMEDIA:
2120 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2121 		break;
2122 	default:
2123 		error = ether_ioctl(ifp, command, data);
2124 	}
2125 
2126 	return (error);
2127 }
2128 
2129 static int
2130 cxgb_media_change(struct ifnet *ifp)
2131 {
2132 	return (EOPNOTSUPP);
2133 }
2134 
2135 /*
2136  * Translates phy->modtype to the correct Ethernet media subtype.
2137  */
2138 static int
2139 cxgb_ifm_type(int mod)
2140 {
2141 	switch (mod) {
2142 	case phy_modtype_sr:
2143 		return (IFM_10G_SR);
2144 	case phy_modtype_lr:
2145 		return (IFM_10G_LR);
2146 	case phy_modtype_lrm:
2147 		return (IFM_10G_LRM);
2148 	case phy_modtype_twinax:
2149 		return (IFM_10G_TWINAX);
2150 	case phy_modtype_twinax_long:
2151 		return (IFM_10G_TWINAX_LONG);
2152 	case phy_modtype_none:
2153 		return (IFM_NONE);
2154 	case phy_modtype_unknown:
2155 		return (IFM_UNKNOWN);
2156 	}
2157 
2158 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2159 	return (IFM_UNKNOWN);
2160 }
2161 
2162 /*
2163  * Rebuilds the ifmedia list for this port, and sets the current media.
2164  */
2165 static void
2166 cxgb_build_medialist(struct port_info *p)
2167 {
2168 	struct cphy *phy = &p->phy;
2169 	struct ifmedia *media = &p->media;
2170 	int mod = phy->modtype;
2171 	int m = IFM_ETHER | IFM_FDX;
2172 
2173 	PORT_LOCK(p);
2174 
2175 	ifmedia_removeall(media);
2176 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2177 		/* Copper (RJ45) */
2178 
2179 		if (phy->caps & SUPPORTED_10000baseT_Full)
2180 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2181 
2182 		if (phy->caps & SUPPORTED_1000baseT_Full)
2183 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2184 
2185 		if (phy->caps & SUPPORTED_100baseT_Full)
2186 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2187 
2188 		if (phy->caps & SUPPORTED_10baseT_Full)
2189 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2190 
2191 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2192 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2193 
2194 	} else if (phy->caps & SUPPORTED_TP) {
2195 		/* Copper (CX4) */
2196 
2197 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2198 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2199 
2200 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2201 		ifmedia_set(media, m | IFM_10G_CX4);
2202 
2203 	} else if (phy->caps & SUPPORTED_FIBRE &&
2204 		   phy->caps & SUPPORTED_10000baseT_Full) {
2205 		/* 10G optical (but includes SFP+ twinax) */
2206 
2207 		m |= cxgb_ifm_type(mod);
2208 		if (IFM_SUBTYPE(m) == IFM_NONE)
2209 			m &= ~IFM_FDX;
2210 
2211 		ifmedia_add(media, m, mod, NULL);
2212 		ifmedia_set(media, m);
2213 
2214 	} else if (phy->caps & SUPPORTED_FIBRE &&
2215 		   phy->caps & SUPPORTED_1000baseT_Full) {
2216 		/* 1G optical */
2217 
2218 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2219 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2220 		ifmedia_set(media, m | IFM_1000_SX);
2221 
2222 	} else {
2223 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2224 			    phy->caps));
2225 	}
2226 
2227 	PORT_UNLOCK(p);
2228 }
2229 
2230 static void
2231 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2232 {
2233 	struct port_info *p = ifp->if_softc;
2234 	struct ifmedia_entry *cur = p->media.ifm_cur;
2235 	int speed = p->link_config.speed;
2236 
2237 	if (cur->ifm_data != p->phy.modtype) {
2238 		cxgb_build_medialist(p);
2239 		cur = p->media.ifm_cur;
2240 	}
2241 
2242 	ifmr->ifm_status = IFM_AVALID;
2243 	if (!p->link_config.link_ok)
2244 		return;
2245 
2246 	ifmr->ifm_status |= IFM_ACTIVE;
2247 
2248 	/*
2249 	 * active and current will differ iff current media is autoselect.  That
2250 	 * can happen only for copper RJ45.
2251 	 */
2252 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2253 		return;
2254 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2255 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2256 
2257 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2258 	if (speed == SPEED_10000)
2259 		ifmr->ifm_active |= IFM_10G_T;
2260 	else if (speed == SPEED_1000)
2261 		ifmr->ifm_active |= IFM_1000_T;
2262 	else if (speed == SPEED_100)
2263 		ifmr->ifm_active |= IFM_100_TX;
2264 	else if (speed == SPEED_10)
2265 		ifmr->ifm_active |= IFM_10_T;
2266 	else
2267 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2268 			    speed));
2269 }
2270 
2271 static void
2272 cxgb_async_intr(void *data)
2273 {
2274 	adapter_t *sc = data;
2275 
2276 	if (cxgb_debug)
2277 		device_printf(sc->dev, "cxgb_async_intr\n");
2278 	/*
2279 	 * May need to sleep - defer to taskqueue
2280 	 */
2281 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2282 }
2283 
2284 static void
2285 cxgb_ext_intr_handler(void *arg, int count)
2286 {
2287 	adapter_t *sc = (adapter_t *)arg;
2288 
2289 	if (cxgb_debug)
2290 		printf("cxgb_ext_intr_handler\n");
2291 
2292 	t3_phy_intr_handler(sc);
2293 
2294 	/* Now reenable external interrupts */
2295 	ADAPTER_LOCK(sc);
2296 	if (sc->slow_intr_mask) {
2297 		sc->slow_intr_mask |= F_T3DBG;
2298 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2299 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2300 	}
2301 	ADAPTER_UNLOCK(sc);
2302 }
2303 
2304 static inline int
2305 link_poll_needed(struct port_info *p)
2306 {
2307 	struct cphy *phy = &p->phy;
2308 
2309 	if (phy->caps & POLL_LINK_1ST_TIME) {
2310 		p->phy.caps &= ~POLL_LINK_1ST_TIME;
2311 		return (1);
2312 	}
2313 
2314 	return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2315 }
2316 
2317 static void
2318 check_link_status(adapter_t *sc)
2319 {
2320 	int i;
2321 
2322 	for (i = 0; i < (sc)->params.nports; ++i) {
2323 		struct port_info *p = &sc->port[i];
2324 
2325 		if (!isset(&sc->open_device_map, p->port_id))
2326 			continue;
2327 
2328 		if (link_poll_needed(p))
2329 			t3_link_changed(sc, i);
2330 	}
2331 }
2332 
2333 static void
2334 check_t3b2_mac(struct adapter *sc)
2335 {
2336 	int i;
2337 
2338 	if (sc->flags & CXGB_SHUTDOWN)
2339 		return;
2340 
2341 	for_each_port(sc, i) {
2342 		struct port_info *p = &sc->port[i];
2343 		int status;
2344 #ifdef INVARIANTS
2345 		struct ifnet *ifp = p->ifp;
2346 #endif
2347 
2348 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2349 		    !p->link_config.link_ok)
2350 			continue;
2351 
2352 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2353 			("%s: state mismatch (drv_flags %x, device_map %x)",
2354 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2355 
2356 		PORT_LOCK(p);
2357 		status = t3b2_mac_watchdog_task(&p->mac);
2358 		if (status == 1)
2359 			p->mac.stats.num_toggled++;
2360 		else if (status == 2) {
2361 			struct cmac *mac = &p->mac;
2362 
2363 			cxgb_update_mac_settings(p);
2364 			t3_link_start(&p->phy, mac, &p->link_config);
2365 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2366 			t3_port_intr_enable(sc, p->port_id);
2367 			p->mac.stats.num_resets++;
2368 		}
2369 		PORT_UNLOCK(p);
2370 	}
2371 }
2372 
2373 static void
2374 cxgb_tick(void *arg)
2375 {
2376 	adapter_t *sc = (adapter_t *)arg;
2377 
2378 	if (sc->flags & CXGB_SHUTDOWN)
2379 		return;
2380 
2381 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2382 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2383 }
2384 
2385 static void
2386 cxgb_tick_handler(void *arg, int count)
2387 {
2388 	adapter_t *sc = (adapter_t *)arg;
2389 	const struct adapter_params *p = &sc->params;
2390 	int i;
2391 	uint32_t cause, reset;
2392 
2393 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2394 		return;
2395 
2396 	check_link_status(sc);
2397 
2398 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2399 		check_t3b2_mac(sc);
2400 
2401 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2402 	if (cause) {
2403 		struct sge_qset *qs = &sc->sge.qs[0];
2404 		uint32_t mask, v;
2405 
2406 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2407 
2408 		mask = 1;
2409 		for (i = 0; i < SGE_QSETS; i++) {
2410 			if (v & mask)
2411 				qs[i].rspq.starved++;
2412 			mask <<= 1;
2413 		}
2414 
2415 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2416 
2417 		for (i = 0; i < SGE_QSETS * 2; i++) {
2418 			if (v & mask) {
2419 				qs[i / 2].fl[i % 2].empty++;
2420 			}
2421 			mask <<= 1;
2422 		}
2423 
2424 		/* clear */
2425 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2426 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2427 	}
2428 
2429 	for (i = 0; i < sc->params.nports; i++) {
2430 		struct port_info *pi = &sc->port[i];
2431 		struct ifnet *ifp = pi->ifp;
2432 		struct cmac *mac = &pi->mac;
2433 		struct mac_stats *mstats = &mac->stats;
2434 		int drops, j;
2435 
2436 		if (!isset(&sc->open_device_map, pi->port_id))
2437 			continue;
2438 
2439 		PORT_LOCK(pi);
2440 		t3_mac_update_stats(mac);
2441 		PORT_UNLOCK(pi);
2442 
2443 		ifp->if_opackets = mstats->tx_frames;
2444 		ifp->if_ipackets = mstats->rx_frames;
2445 		ifp->if_obytes = mstats->tx_octets;
2446 		ifp->if_ibytes = mstats->rx_octets;
2447 		ifp->if_omcasts = mstats->tx_mcast_frames;
2448 		ifp->if_imcasts = mstats->rx_mcast_frames;
2449 		ifp->if_collisions = mstats->tx_total_collisions;
2450 		ifp->if_iqdrops = mstats->rx_cong_drops;
2451 
2452 		drops = 0;
2453 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2454 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2455 		ifp->if_snd.ifq_drops = drops;
2456 
2457 		ifp->if_oerrors =
2458 		    mstats->tx_excess_collisions +
2459 		    mstats->tx_underrun +
2460 		    mstats->tx_len_errs +
2461 		    mstats->tx_mac_internal_errs +
2462 		    mstats->tx_excess_deferral +
2463 		    mstats->tx_fcs_errs;
2464 		ifp->if_ierrors =
2465 		    mstats->rx_jabber +
2466 		    mstats->rx_data_errs +
2467 		    mstats->rx_sequence_errs +
2468 		    mstats->rx_runt +
2469 		    mstats->rx_too_long +
2470 		    mstats->rx_mac_internal_errs +
2471 		    mstats->rx_short +
2472 		    mstats->rx_fcs_errs;
2473 
2474 		if (mac->multiport)
2475 			continue;
2476 
2477 		/* Count rx fifo overflows, once per second */
2478 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2479 		reset = 0;
2480 		if (cause & F_RXFIFO_OVERFLOW) {
2481 			mac->stats.rx_fifo_ovfl++;
2482 			reset |= F_RXFIFO_OVERFLOW;
2483 		}
2484 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2485 	}
2486 }
2487 
2488 static void
2489 touch_bars(device_t dev)
2490 {
2491 	/*
2492 	 * Don't enable yet
2493 	 */
2494 #if !defined(__LP64__) && 0
2495 	u32 v;
2496 
2497 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2498 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2499 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2500 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2501 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2502 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2503 #endif
2504 }
2505 
2506 static int
2507 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2508 {
2509 	uint8_t *buf;
2510 	int err = 0;
2511 	u32 aligned_offset, aligned_len, *p;
2512 	struct adapter *adapter = pi->adapter;
2513 
2514 
2515 	aligned_offset = offset & ~3;
2516 	aligned_len = (len + (offset & 3) + 3) & ~3;
2517 
2518 	if (aligned_offset != offset || aligned_len != len) {
2519 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2520 		if (!buf)
2521 			return (ENOMEM);
2522 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2523 		if (!err && aligned_len > 4)
2524 			err = t3_seeprom_read(adapter,
2525 					      aligned_offset + aligned_len - 4,
2526 					      (u32 *)&buf[aligned_len - 4]);
2527 		if (err)
2528 			goto out;
2529 		memcpy(buf + (offset & 3), data, len);
2530 	} else
2531 		buf = (uint8_t *)(uintptr_t)data;
2532 
2533 	err = t3_seeprom_wp(adapter, 0);
2534 	if (err)
2535 		goto out;
2536 
2537 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2538 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2539 		aligned_offset += 4;
2540 	}
2541 
2542 	if (!err)
2543 		err = t3_seeprom_wp(adapter, 1);
2544 out:
2545 	if (buf != data)
2546 		free(buf, M_DEVBUF);
2547 	return err;
2548 }
2549 
2550 
2551 static int
2552 in_range(int val, int lo, int hi)
2553 {
2554 	return val < 0 || (val <= hi && val >= lo);
2555 }
2556 
2557 static int
2558 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2559 {
2560        return (0);
2561 }
2562 
2563 static int
2564 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2565 {
2566        return (0);
2567 }
2568 
2569 static int
2570 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2571     int fflag, struct thread *td)
2572 {
2573 	int mmd, error = 0;
2574 	struct port_info *pi = dev->si_drv1;
2575 	adapter_t *sc = pi->adapter;
2576 
2577 #ifdef PRIV_SUPPORTED
2578 	if (priv_check(td, PRIV_DRIVER)) {
2579 		if (cxgb_debug)
2580 			printf("user does not have access to privileged ioctls\n");
2581 		return (EPERM);
2582 	}
2583 #else
2584 	if (suser(td)) {
2585 		if (cxgb_debug)
2586 			printf("user does not have access to privileged ioctls\n");
2587 		return (EPERM);
2588 	}
2589 #endif
2590 
2591 	switch (cmd) {
2592 	case CHELSIO_GET_MIIREG: {
2593 		uint32_t val;
2594 		struct cphy *phy = &pi->phy;
2595 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2596 
2597 		if (!phy->mdio_read)
2598 			return (EOPNOTSUPP);
2599 		if (is_10G(sc)) {
2600 			mmd = mid->phy_id >> 8;
2601 			if (!mmd)
2602 				mmd = MDIO_DEV_PCS;
2603 			else if (mmd > MDIO_DEV_VEND2)
2604 				return (EINVAL);
2605 
2606 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2607 					     mid->reg_num, &val);
2608 		} else
2609 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2610 					     mid->reg_num & 0x1f, &val);
2611 		if (error == 0)
2612 			mid->val_out = val;
2613 		break;
2614 	}
2615 	case CHELSIO_SET_MIIREG: {
2616 		struct cphy *phy = &pi->phy;
2617 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2618 
2619 		if (!phy->mdio_write)
2620 			return (EOPNOTSUPP);
2621 		if (is_10G(sc)) {
2622 			mmd = mid->phy_id >> 8;
2623 			if (!mmd)
2624 				mmd = MDIO_DEV_PCS;
2625 			else if (mmd > MDIO_DEV_VEND2)
2626 				return (EINVAL);
2627 
2628 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2629 					      mmd, mid->reg_num, mid->val_in);
2630 		} else
2631 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2632 					      mid->reg_num & 0x1f,
2633 					      mid->val_in);
2634 		break;
2635 	}
2636 	case CHELSIO_SETREG: {
2637 		struct ch_reg *edata = (struct ch_reg *)data;
2638 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2639 			return (EFAULT);
2640 		t3_write_reg(sc, edata->addr, edata->val);
2641 		break;
2642 	}
2643 	case CHELSIO_GETREG: {
2644 		struct ch_reg *edata = (struct ch_reg *)data;
2645 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2646 			return (EFAULT);
2647 		edata->val = t3_read_reg(sc, edata->addr);
2648 		break;
2649 	}
2650 	case CHELSIO_GET_SGE_CONTEXT: {
2651 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2652 		mtx_lock_spin(&sc->sge.reg_lock);
2653 		switch (ecntxt->cntxt_type) {
2654 		case CNTXT_TYPE_EGRESS:
2655 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2656 			    ecntxt->data);
2657 			break;
2658 		case CNTXT_TYPE_FL:
2659 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2660 			    ecntxt->data);
2661 			break;
2662 		case CNTXT_TYPE_RSP:
2663 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2664 			    ecntxt->data);
2665 			break;
2666 		case CNTXT_TYPE_CQ:
2667 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2668 			    ecntxt->data);
2669 			break;
2670 		default:
2671 			error = EINVAL;
2672 			break;
2673 		}
2674 		mtx_unlock_spin(&sc->sge.reg_lock);
2675 		break;
2676 	}
2677 	case CHELSIO_GET_SGE_DESC: {
2678 		struct ch_desc *edesc = (struct ch_desc *)data;
2679 		int ret;
2680 		if (edesc->queue_num >= SGE_QSETS * 6)
2681 			return (EINVAL);
2682 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2683 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2684 		if (ret < 0)
2685 			return (EINVAL);
2686 		edesc->size = ret;
2687 		break;
2688 	}
2689 	case CHELSIO_GET_QSET_PARAMS: {
2690 		struct qset_params *q;
2691 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2692 		int q1 = pi->first_qset;
2693 		int nqsets = pi->nqsets;
2694 		int i;
2695 
2696 		if (t->qset_idx >= nqsets)
2697 			return EINVAL;
2698 
2699 		i = q1 + t->qset_idx;
2700 		q = &sc->params.sge.qset[i];
2701 		t->rspq_size   = q->rspq_size;
2702 		t->txq_size[0] = q->txq_size[0];
2703 		t->txq_size[1] = q->txq_size[1];
2704 		t->txq_size[2] = q->txq_size[2];
2705 		t->fl_size[0]  = q->fl_size;
2706 		t->fl_size[1]  = q->jumbo_size;
2707 		t->polling     = q->polling;
2708 		t->lro         = q->lro;
2709 		t->intr_lat    = q->coalesce_usecs;
2710 		t->cong_thres  = q->cong_thres;
2711 		t->qnum        = i;
2712 
2713 		if ((sc->flags & FULL_INIT_DONE) == 0)
2714 			t->vector = 0;
2715 		else if (sc->flags & USING_MSIX)
2716 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2717 		else
2718 			t->vector = rman_get_start(sc->irq_res);
2719 
2720 		break;
2721 	}
2722 	case CHELSIO_GET_QSET_NUM: {
2723 		struct ch_reg *edata = (struct ch_reg *)data;
2724 		edata->val = pi->nqsets;
2725 		break;
2726 	}
2727 	case CHELSIO_LOAD_FW: {
2728 		uint8_t *fw_data;
2729 		uint32_t vers;
2730 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2731 
2732 		/*
2733 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2734 		 *
2735 		 * FW_UPTODATE is also set so the rest of the initialization
2736 		 * will not overwrite what was loaded here.  This gives you the
2737 		 * flexibility to load any firmware (and maybe shoot yourself in
2738 		 * the foot).
2739 		 */
2740 
2741 		ADAPTER_LOCK(sc);
2742 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2743 			ADAPTER_UNLOCK(sc);
2744 			return (EBUSY);
2745 		}
2746 
2747 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2748 		if (!fw_data)
2749 			error = ENOMEM;
2750 		else
2751 			error = copyin(t->buf, fw_data, t->len);
2752 
2753 		if (!error)
2754 			error = -t3_load_fw(sc, fw_data, t->len);
2755 
2756 		if (t3_get_fw_version(sc, &vers) == 0) {
2757 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2758 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2759 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2760 		}
2761 
2762 		if (!error)
2763 			sc->flags |= FW_UPTODATE;
2764 
2765 		free(fw_data, M_DEVBUF);
2766 		ADAPTER_UNLOCK(sc);
2767 		break;
2768 	}
2769 	case CHELSIO_LOAD_BOOT: {
2770 		uint8_t *boot_data;
2771 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2772 
2773 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2774 		if (!boot_data)
2775 			return ENOMEM;
2776 
2777 		error = copyin(t->buf, boot_data, t->len);
2778 		if (!error)
2779 			error = -t3_load_boot(sc, boot_data, t->len);
2780 
2781 		free(boot_data, M_DEVBUF);
2782 		break;
2783 	}
2784 	case CHELSIO_GET_PM: {
2785 		struct ch_pm *m = (struct ch_pm *)data;
2786 		struct tp_params *p = &sc->params.tp;
2787 
2788 		if (!is_offload(sc))
2789 			return (EOPNOTSUPP);
2790 
2791 		m->tx_pg_sz = p->tx_pg_size;
2792 		m->tx_num_pg = p->tx_num_pgs;
2793 		m->rx_pg_sz  = p->rx_pg_size;
2794 		m->rx_num_pg = p->rx_num_pgs;
2795 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2796 
2797 		break;
2798 	}
2799 	case CHELSIO_SET_PM: {
2800 		struct ch_pm *m = (struct ch_pm *)data;
2801 		struct tp_params *p = &sc->params.tp;
2802 
2803 		if (!is_offload(sc))
2804 			return (EOPNOTSUPP);
2805 		if (sc->flags & FULL_INIT_DONE)
2806 			return (EBUSY);
2807 
2808 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2809 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2810 			return (EINVAL);	/* not power of 2 */
2811 		if (!(m->rx_pg_sz & 0x14000))
2812 			return (EINVAL);	/* not 16KB or 64KB */
2813 		if (!(m->tx_pg_sz & 0x1554000))
2814 			return (EINVAL);
2815 		if (m->tx_num_pg == -1)
2816 			m->tx_num_pg = p->tx_num_pgs;
2817 		if (m->rx_num_pg == -1)
2818 			m->rx_num_pg = p->rx_num_pgs;
2819 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2820 			return (EINVAL);
2821 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2822 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2823 			return (EINVAL);
2824 
2825 		p->rx_pg_size = m->rx_pg_sz;
2826 		p->tx_pg_size = m->tx_pg_sz;
2827 		p->rx_num_pgs = m->rx_num_pg;
2828 		p->tx_num_pgs = m->tx_num_pg;
2829 		break;
2830 	}
2831 	case CHELSIO_SETMTUTAB: {
2832 		struct ch_mtus *m = (struct ch_mtus *)data;
2833 		int i;
2834 
2835 		if (!is_offload(sc))
2836 			return (EOPNOTSUPP);
2837 		if (offload_running(sc))
2838 			return (EBUSY);
2839 		if (m->nmtus != NMTUS)
2840 			return (EINVAL);
2841 		if (m->mtus[0] < 81)         /* accommodate SACK */
2842 			return (EINVAL);
2843 
2844 		/*
2845 		 * MTUs must be in ascending order
2846 		 */
2847 		for (i = 1; i < NMTUS; ++i)
2848 			if (m->mtus[i] < m->mtus[i - 1])
2849 				return (EINVAL);
2850 
2851 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2852 		break;
2853 	}
2854 	case CHELSIO_GETMTUTAB: {
2855 		struct ch_mtus *m = (struct ch_mtus *)data;
2856 
2857 		if (!is_offload(sc))
2858 			return (EOPNOTSUPP);
2859 
2860 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2861 		m->nmtus = NMTUS;
2862 		break;
2863 	}
2864 	case CHELSIO_GET_MEM: {
2865 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2866 		struct mc7 *mem;
2867 		uint8_t *useraddr;
2868 		u64 buf[32];
2869 
2870 		/*
2871 		 * Use these to avoid modifying len/addr in the the return
2872 		 * struct
2873 		 */
2874 		uint32_t len = t->len, addr = t->addr;
2875 
2876 		if (!is_offload(sc))
2877 			return (EOPNOTSUPP);
2878 		if (!(sc->flags & FULL_INIT_DONE))
2879 			return (EIO);         /* need the memory controllers */
2880 		if ((addr & 0x7) || (len & 0x7))
2881 			return (EINVAL);
2882 		if (t->mem_id == MEM_CM)
2883 			mem = &sc->cm;
2884 		else if (t->mem_id == MEM_PMRX)
2885 			mem = &sc->pmrx;
2886 		else if (t->mem_id == MEM_PMTX)
2887 			mem = &sc->pmtx;
2888 		else
2889 			return (EINVAL);
2890 
2891 		/*
2892 		 * Version scheme:
2893 		 * bits 0..9: chip version
2894 		 * bits 10..15: chip revision
2895 		 */
2896 		t->version = 3 | (sc->params.rev << 10);
2897 
2898 		/*
2899 		 * Read 256 bytes at a time as len can be large and we don't
2900 		 * want to use huge intermediate buffers.
2901 		 */
2902 		useraddr = (uint8_t *)t->buf;
2903 		while (len) {
2904 			unsigned int chunk = min(len, sizeof(buf));
2905 
2906 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2907 			if (error)
2908 				return (-error);
2909 			if (copyout(buf, useraddr, chunk))
2910 				return (EFAULT);
2911 			useraddr += chunk;
2912 			addr += chunk;
2913 			len -= chunk;
2914 		}
2915 		break;
2916 	}
2917 	case CHELSIO_READ_TCAM_WORD: {
2918 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2919 
2920 		if (!is_offload(sc))
2921 			return (EOPNOTSUPP);
2922 		if (!(sc->flags & FULL_INIT_DONE))
2923 			return (EIO);         /* need MC5 */
2924 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2925 		break;
2926 	}
2927 	case CHELSIO_SET_TRACE_FILTER: {
2928 		struct ch_trace *t = (struct ch_trace *)data;
2929 		const struct trace_params *tp;
2930 
2931 		tp = (const struct trace_params *)&t->sip;
2932 		if (t->config_tx)
2933 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2934 					       t->trace_tx);
2935 		if (t->config_rx)
2936 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2937 					       t->trace_rx);
2938 		break;
2939 	}
2940 	case CHELSIO_SET_PKTSCHED: {
2941 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2942 		if (sc->open_device_map == 0)
2943 			return (EAGAIN);
2944 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2945 		    p->binding);
2946 		break;
2947 	}
2948 	case CHELSIO_IFCONF_GETREGS: {
2949 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2950 		int reglen = cxgb_get_regs_len();
2951 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2952 		if (buf == NULL) {
2953 			return (ENOMEM);
2954 		}
2955 		if (regs->len > reglen)
2956 			regs->len = reglen;
2957 		else if (regs->len < reglen)
2958 			error = ENOBUFS;
2959 
2960 		if (!error) {
2961 			cxgb_get_regs(sc, regs, buf);
2962 			error = copyout(buf, regs->data, reglen);
2963 		}
2964 		free(buf, M_DEVBUF);
2965 
2966 		break;
2967 	}
2968 	case CHELSIO_SET_HW_SCHED: {
2969 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2970 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2971 
2972 		if ((sc->flags & FULL_INIT_DONE) == 0)
2973 			return (EAGAIN);       /* need TP to be initialized */
2974 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2975 		    !in_range(t->channel, 0, 1) ||
2976 		    !in_range(t->kbps, 0, 10000000) ||
2977 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2978 		    !in_range(t->flow_ipg, 0,
2979 			      dack_ticks_to_usec(sc, 0x7ff)))
2980 			return (EINVAL);
2981 
2982 		if (t->kbps >= 0) {
2983 			error = t3_config_sched(sc, t->kbps, t->sched);
2984 			if (error < 0)
2985 				return (-error);
2986 		}
2987 		if (t->class_ipg >= 0)
2988 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2989 		if (t->flow_ipg >= 0) {
2990 			t->flow_ipg *= 1000;     /* us -> ns */
2991 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2992 		}
2993 		if (t->mode >= 0) {
2994 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2995 
2996 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2997 					 bit, t->mode ? bit : 0);
2998 		}
2999 		if (t->channel >= 0)
3000 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3001 					 1 << t->sched, t->channel << t->sched);
3002 		break;
3003 	}
3004 	case CHELSIO_GET_EEPROM: {
3005 		int i;
3006 		struct ch_eeprom *e = (struct ch_eeprom *)data;
3007 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3008 
3009 		if (buf == NULL) {
3010 			return (ENOMEM);
3011 		}
3012 		e->magic = EEPROM_MAGIC;
3013 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3014 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3015 
3016 		if (!error)
3017 			error = copyout(buf + e->offset, e->data, e->len);
3018 
3019 		free(buf, M_DEVBUF);
3020 		break;
3021 	}
3022 	case CHELSIO_CLEAR_STATS: {
3023 		if (!(sc->flags & FULL_INIT_DONE))
3024 			return EAGAIN;
3025 
3026 		PORT_LOCK(pi);
3027 		t3_mac_update_stats(&pi->mac);
3028 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3029 		PORT_UNLOCK(pi);
3030 		break;
3031 	}
3032 	case CHELSIO_GET_UP_LA: {
3033 		struct ch_up_la *la = (struct ch_up_la *)data;
3034 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3035 		if (buf == NULL) {
3036 			return (ENOMEM);
3037 		}
3038 		if (la->bufsize < LA_BUFSIZE)
3039 			error = ENOBUFS;
3040 
3041 		if (!error)
3042 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3043 					      &la->bufsize, buf);
3044 		if (!error)
3045 			error = copyout(buf, la->data, la->bufsize);
3046 
3047 		free(buf, M_DEVBUF);
3048 		break;
3049 	}
3050 	case CHELSIO_GET_UP_IOQS: {
3051 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3052 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3053 		uint32_t *v;
3054 
3055 		if (buf == NULL) {
3056 			return (ENOMEM);
3057 		}
3058 		if (ioqs->bufsize < IOQS_BUFSIZE)
3059 			error = ENOBUFS;
3060 
3061 		if (!error)
3062 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3063 
3064 		if (!error) {
3065 			v = (uint32_t *)buf;
3066 
3067 			ioqs->bufsize -= 4 * sizeof(uint32_t);
3068 			ioqs->ioq_rx_enable = *v++;
3069 			ioqs->ioq_tx_enable = *v++;
3070 			ioqs->ioq_rx_status = *v++;
3071 			ioqs->ioq_tx_status = *v++;
3072 
3073 			error = copyout(v, ioqs->data, ioqs->bufsize);
3074 		}
3075 
3076 		free(buf, M_DEVBUF);
3077 		break;
3078 	}
3079 	default:
3080 		return (EOPNOTSUPP);
3081 		break;
3082 	}
3083 
3084 	return (error);
3085 }
3086 
3087 static __inline void
3088 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3089     unsigned int end)
3090 {
3091 	uint32_t *p = (uint32_t *)(buf + start);
3092 
3093 	for ( ; start <= end; start += sizeof(uint32_t))
3094 		*p++ = t3_read_reg(ap, start);
3095 }
3096 
3097 #define T3_REGMAP_SIZE (3 * 1024)
3098 static int
3099 cxgb_get_regs_len(void)
3100 {
3101 	return T3_REGMAP_SIZE;
3102 }
3103 
3104 static void
3105 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3106 {
3107 
3108 	/*
3109 	 * Version scheme:
3110 	 * bits 0..9: chip version
3111 	 * bits 10..15: chip revision
3112 	 * bit 31: set for PCIe cards
3113 	 */
3114 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3115 
3116 	/*
3117 	 * We skip the MAC statistics registers because they are clear-on-read.
3118 	 * Also reading multi-register stats would need to synchronize with the
3119 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3120 	 */
3121 	memset(buf, 0, cxgb_get_regs_len());
3122 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3123 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3124 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3125 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3126 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3127 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3128 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3129 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3130 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3131 }
3132 
3133 
3134 MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
3135