xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 52c2bb75163559a6e2866ad374a7de67a4ea1273)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_inet.h"
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/bus.h>
40 #include <sys/module.h>
41 #include <sys/pciio.h>
42 #include <sys/conf.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
45 #include <sys/ktr.h>
46 #include <sys/rman.h>
47 #include <sys/ioccom.h>
48 #include <sys/mbuf.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/queue.h>
57 #include <sys/taskqueue.h>
58 #include <sys/proc.h>
59 
60 #include <net/bpf.h>
61 #include <net/ethernet.h>
62 #include <net/if.h>
63 #include <net/if_var.h>
64 #include <net/if_arp.h>
65 #include <net/if_dl.h>
66 #include <net/if_media.h>
67 #include <net/if_types.h>
68 #include <net/if_vlan_var.h>
69 
70 #include <netinet/in_systm.h>
71 #include <netinet/in.h>
72 #include <netinet/if_ether.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip.h>
75 #include <netinet/tcp.h>
76 #include <netinet/udp.h>
77 #include <netinet/netdump/netdump.h>
78 
79 #include <dev/pci/pcireg.h>
80 #include <dev/pci/pcivar.h>
81 #include <dev/pci/pci_private.h>
82 
83 #include <cxgb_include.h>
84 
85 #ifdef PRIV_SUPPORTED
86 #include <sys/priv.h>
87 #endif
88 
89 static int cxgb_setup_interrupts(adapter_t *);
90 static void cxgb_teardown_interrupts(adapter_t *);
91 static void cxgb_init(void *);
92 static int cxgb_init_locked(struct port_info *);
93 static int cxgb_uninit_locked(struct port_info *);
94 static int cxgb_uninit_synchronized(struct port_info *);
95 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96 static int cxgb_media_change(struct ifnet *);
97 static int cxgb_ifm_type(int);
98 static void cxgb_build_medialist(struct port_info *);
99 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100 static uint64_t cxgb_get_counter(struct ifnet *, ift_counter);
101 static int setup_sge_qsets(adapter_t *);
102 static void cxgb_async_intr(void *);
103 static void cxgb_tick_handler(void *, int);
104 static void cxgb_tick(void *);
105 static void link_check_callout(void *);
106 static void check_link_status(void *, int);
107 static void setup_rss(adapter_t *sc);
108 static int alloc_filters(struct adapter *);
109 static int setup_hw_filters(struct adapter *);
110 static int set_filter(struct adapter *, int, const struct filter_info *);
111 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
112     unsigned int, u64, u64);
113 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
114     unsigned int, u64, u64);
115 #ifdef TCP_OFFLOAD
116 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
117 #endif
118 
119 /* Attachment glue for the PCI controller end of the device.  Each port of
120  * the device is attached separately, as defined later.
121  */
122 static int cxgb_controller_probe(device_t);
123 static int cxgb_controller_attach(device_t);
124 static int cxgb_controller_detach(device_t);
125 static void cxgb_free(struct adapter *);
126 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
127     unsigned int end);
128 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
129 static int cxgb_get_regs_len(void);
130 static void touch_bars(device_t dev);
131 static void cxgb_update_mac_settings(struct port_info *p);
132 #ifdef TCP_OFFLOAD
133 static int toe_capability(struct port_info *, int);
134 #endif
135 
136 /* Table for probing the cards.  The desc field isn't actually used */
137 struct cxgb_ident {
138 	uint16_t	vendor;
139 	uint16_t	device;
140 	int		index;
141 	char		*desc;
142 } cxgb_identifiers[] = {
143 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
144 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
145 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
146 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
147 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
148 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
149 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
150 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
151 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
152 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
153 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
154 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
155 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
156 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
157 	{0, 0, 0, NULL}
158 };
159 
160 static device_method_t cxgb_controller_methods[] = {
161 	DEVMETHOD(device_probe,		cxgb_controller_probe),
162 	DEVMETHOD(device_attach,	cxgb_controller_attach),
163 	DEVMETHOD(device_detach,	cxgb_controller_detach),
164 
165 	DEVMETHOD_END
166 };
167 
168 static driver_t cxgb_controller_driver = {
169 	"cxgbc",
170 	cxgb_controller_methods,
171 	sizeof(struct adapter)
172 };
173 
174 static int cxgbc_mod_event(module_t, int, void *);
175 static devclass_t	cxgb_controller_devclass;
176 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
177     cxgbc_mod_event, 0);
178 MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
179     nitems(cxgb_identifiers) - 1);
180 MODULE_VERSION(cxgbc, 1);
181 MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
182 
183 /*
184  * Attachment glue for the ports.  Attachment is done directly to the
185  * controller device.
186  */
187 static int cxgb_port_probe(device_t);
188 static int cxgb_port_attach(device_t);
189 static int cxgb_port_detach(device_t);
190 
191 static device_method_t cxgb_port_methods[] = {
192 	DEVMETHOD(device_probe,		cxgb_port_probe),
193 	DEVMETHOD(device_attach,	cxgb_port_attach),
194 	DEVMETHOD(device_detach,	cxgb_port_detach),
195 	{ 0, 0 }
196 };
197 
198 static driver_t cxgb_port_driver = {
199 	"cxgb",
200 	cxgb_port_methods,
201 	0
202 };
203 
204 static d_ioctl_t cxgb_extension_ioctl;
205 static d_open_t cxgb_extension_open;
206 static d_close_t cxgb_extension_close;
207 
208 static struct cdevsw cxgb_cdevsw = {
209        .d_version =    D_VERSION,
210        .d_flags =      0,
211        .d_open =       cxgb_extension_open,
212        .d_close =      cxgb_extension_close,
213        .d_ioctl =      cxgb_extension_ioctl,
214        .d_name =       "cxgb",
215 };
216 
217 static devclass_t	cxgb_port_devclass;
218 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
219 MODULE_VERSION(cxgb, 1);
220 
221 NETDUMP_DEFINE(cxgb);
222 
223 static struct mtx t3_list_lock;
224 static SLIST_HEAD(, adapter) t3_list;
225 #ifdef TCP_OFFLOAD
226 static struct mtx t3_uld_list_lock;
227 static SLIST_HEAD(, uld_info) t3_uld_list;
228 #endif
229 
230 /*
231  * The driver uses the best interrupt scheme available on a platform in the
232  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
233  * of these schemes the driver may consider as follows:
234  *
235  * msi = 2: choose from among all three options
236  * msi = 1 : only consider MSI and pin interrupts
237  * msi = 0: force pin interrupts
238  */
239 static int msi_allowed = 2;
240 
241 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
242 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
243     "MSI-X, MSI, INTx selector");
244 
245 /*
246  * The driver uses an auto-queue algorithm by default.
247  * To disable it and force a single queue-set per port, use multiq = 0
248  */
249 static int multiq = 1;
250 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
251     "use min(ncpus/ports, 8) queue-sets per port");
252 
253 /*
254  * By default the driver will not update the firmware unless
255  * it was compiled against a newer version
256  *
257  */
258 static int force_fw_update = 0;
259 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
260     "update firmware even if up to date");
261 
262 int cxgb_use_16k_clusters = -1;
263 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
264     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
265 
266 static int nfilters = -1;
267 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
268     &nfilters, 0, "max number of entries in the filter table");
269 
270 enum {
271 	MAX_TXQ_ENTRIES      = 16384,
272 	MAX_CTRL_TXQ_ENTRIES = 1024,
273 	MAX_RSPQ_ENTRIES     = 16384,
274 	MAX_RX_BUFFERS       = 16384,
275 	MAX_RX_JUMBO_BUFFERS = 16384,
276 	MIN_TXQ_ENTRIES      = 4,
277 	MIN_CTRL_TXQ_ENTRIES = 4,
278 	MIN_RSPQ_ENTRIES     = 32,
279 	MIN_FL_ENTRIES       = 32,
280 	MIN_FL_JUMBO_ENTRIES = 32
281 };
282 
283 struct filter_info {
284 	u32 sip;
285 	u32 sip_mask;
286 	u32 dip;
287 	u16 sport;
288 	u16 dport;
289 	u32 vlan:12;
290 	u32 vlan_prio:3;
291 	u32 mac_hit:1;
292 	u32 mac_idx:4;
293 	u32 mac_vld:1;
294 	u32 pkt_type:2;
295 	u32 report_filter_id:1;
296 	u32 pass:1;
297 	u32 rss:1;
298 	u32 qset:3;
299 	u32 locked:1;
300 	u32 valid:1;
301 };
302 
303 enum { FILTER_NO_VLAN_PRI = 7 };
304 
305 #define EEPROM_MAGIC 0x38E2F10C
306 
307 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
308 
309 
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311 
312 
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316 	char rev = 'z';
317 
318 	switch(adapter->params.rev) {
319 	case T3_REV_A:
320 		rev = 'a';
321 		break;
322 	case T3_REV_B:
323 	case T3_REV_B2:
324 		rev = 'b';
325 		break;
326 	case T3_REV_C:
327 		rev = 'c';
328 		break;
329 	}
330 	return rev;
331 }
332 
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336 	struct cxgb_ident *id;
337 
338 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339 		if ((id->vendor == pci_get_vendor(dev)) &&
340 		    (id->device == pci_get_device(dev))) {
341 			return (id);
342 		}
343 	}
344 	return (NULL);
345 }
346 
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350 	struct cxgb_ident *id;
351 	const struct adapter_info *ai;
352 
353 	id = cxgb_get_ident(dev);
354 	if (id == NULL)
355 		return (NULL);
356 
357 	ai = t3_get_adapter_info(id->index);
358 
359 	return (ai);
360 }
361 
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365 	const struct adapter_info *ai;
366 	char *ports, buf[80];
367 	int nports;
368 
369 	ai = cxgb_get_adapter_info(dev);
370 	if (ai == NULL)
371 		return (ENXIO);
372 
373 	nports = ai->nports0 + ai->nports1;
374 	if (nports == 1)
375 		ports = "port";
376 	else
377 		ports = "ports";
378 
379 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380 	device_set_desc_copy(dev, buf);
381 	return (BUS_PROBE_DEFAULT);
382 }
383 
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387 
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391 	const struct firmware *fw;
392 	int status;
393 	u32 vers;
394 
395 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397 		return (ENOENT);
398 	} else
399 		device_printf(sc->dev, "installing firmware on card\n");
400 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401 
402 	if (status != 0) {
403 		device_printf(sc->dev, "failed to install firmware: %d\n",
404 		    status);
405 	} else {
406 		t3_get_fw_version(sc, &vers);
407 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409 		    G_FW_VERSION_MICRO(vers));
410 	}
411 
412 	firmware_put(fw, FIRMWARE_UNLOAD);
413 
414 	return (status);
415 }
416 
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446 	device_t child;
447 	const struct adapter_info *ai;
448 	struct adapter *sc;
449 	int i, error = 0;
450 	uint32_t vers;
451 	int port_qsets = 1;
452 	int msi_needed, reg;
453 	char buf[80];
454 
455 	sc = device_get_softc(dev);
456 	sc->dev = dev;
457 	sc->msi_count = 0;
458 	ai = cxgb_get_adapter_info(dev);
459 
460 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
461 	    device_get_unit(dev));
462 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
463 
464 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
465 	    device_get_unit(dev));
466 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
467 	    device_get_unit(dev));
468 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
469 	    device_get_unit(dev));
470 
471 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
472 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
473 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
474 
475 	mtx_lock(&t3_list_lock);
476 	SLIST_INSERT_HEAD(&t3_list, sc, link);
477 	mtx_unlock(&t3_list_lock);
478 
479 	/* find the PCIe link width and set max read request to 4KB*/
480 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
481 		uint16_t lnk;
482 
483 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
484 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
485 		if (sc->link_width < 8 &&
486 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
487 			device_printf(sc->dev,
488 			    "PCIe x%d Link, expect reduced performance\n",
489 			    sc->link_width);
490 		}
491 
492 		pci_set_max_read_req(dev, 4096);
493 	}
494 
495 	touch_bars(dev);
496 	pci_enable_busmaster(dev);
497 	/*
498 	 * Allocate the registers and make them available to the driver.
499 	 * The registers that we care about for NIC mode are in BAR 0
500 	 */
501 	sc->regs_rid = PCIR_BAR(0);
502 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
503 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
504 		device_printf(dev, "Cannot allocate BAR region 0\n");
505 		error = ENXIO;
506 		goto out;
507 	}
508 
509 	sc->bt = rman_get_bustag(sc->regs_res);
510 	sc->bh = rman_get_bushandle(sc->regs_res);
511 	sc->mmio_len = rman_get_size(sc->regs_res);
512 
513 	for (i = 0; i < MAX_NPORTS; i++)
514 		sc->port[i].adapter = sc;
515 
516 	if (t3_prep_adapter(sc, ai, 1) < 0) {
517 		printf("prep adapter failed\n");
518 		error = ENODEV;
519 		goto out;
520 	}
521 
522 	sc->udbs_rid = PCIR_BAR(2);
523 	sc->udbs_res = NULL;
524 	if (is_offload(sc) &&
525 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
526 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
527 		device_printf(dev, "Cannot allocate BAR region 1\n");
528 		error = ENXIO;
529 		goto out;
530 	}
531 
532         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
533 	 * enough messages for the queue sets.  If that fails, try falling
534 	 * back to MSI.  If that fails, then try falling back to the legacy
535 	 * interrupt pin model.
536 	 */
537 	sc->msix_regs_rid = 0x20;
538 	if ((msi_allowed >= 2) &&
539 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
540 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
541 
542 		if (multiq)
543 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
544 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
545 
546 		if (pci_msix_count(dev) == 0 ||
547 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
548 		    sc->msi_count != msi_needed) {
549 			device_printf(dev, "alloc msix failed - "
550 				      "msi_count=%d, msi_needed=%d, err=%d; "
551 				      "will try MSI\n", sc->msi_count,
552 				      msi_needed, error);
553 			sc->msi_count = 0;
554 			port_qsets = 1;
555 			pci_release_msi(dev);
556 			bus_release_resource(dev, SYS_RES_MEMORY,
557 			    sc->msix_regs_rid, sc->msix_regs_res);
558 			sc->msix_regs_res = NULL;
559 		} else {
560 			sc->flags |= USING_MSIX;
561 			sc->cxgb_intr = cxgb_async_intr;
562 			device_printf(dev,
563 				      "using MSI-X interrupts (%u vectors)\n",
564 				      sc->msi_count);
565 		}
566 	}
567 
568 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
569 		sc->msi_count = 1;
570 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
571 			device_printf(dev, "alloc msi failed - "
572 				      "err=%d; will try INTx\n", error);
573 			sc->msi_count = 0;
574 			port_qsets = 1;
575 			pci_release_msi(dev);
576 		} else {
577 			sc->flags |= USING_MSI;
578 			sc->cxgb_intr = t3_intr_msi;
579 			device_printf(dev, "using MSI interrupts\n");
580 		}
581 	}
582 	if (sc->msi_count == 0) {
583 		device_printf(dev, "using line interrupts\n");
584 		sc->cxgb_intr = t3b_intr;
585 	}
586 
587 	/* Create a private taskqueue thread for handling driver events */
588 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
589 	    taskqueue_thread_enqueue, &sc->tq);
590 	if (sc->tq == NULL) {
591 		device_printf(dev, "failed to allocate controller task queue\n");
592 		goto out;
593 	}
594 
595 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
596 	    device_get_nameunit(dev));
597 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
598 
599 
600 	/* Create a periodic callout for checking adapter status */
601 	callout_init(&sc->cxgb_tick_ch, 1);
602 
603 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
604 		/*
605 		 * Warn user that a firmware update will be attempted in init.
606 		 */
607 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
608 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
609 		sc->flags &= ~FW_UPTODATE;
610 	} else {
611 		sc->flags |= FW_UPTODATE;
612 	}
613 
614 	if (t3_check_tpsram_version(sc) < 0) {
615 		/*
616 		 * Warn user that a firmware update will be attempted in init.
617 		 */
618 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
619 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
620 		sc->flags &= ~TPS_UPTODATE;
621 	} else {
622 		sc->flags |= TPS_UPTODATE;
623 	}
624 
625 	/*
626 	 * Create a child device for each MAC.  The ethernet attachment
627 	 * will be done in these children.
628 	 */
629 	for (i = 0; i < (sc)->params.nports; i++) {
630 		struct port_info *pi;
631 
632 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
633 			device_printf(dev, "failed to add child port\n");
634 			error = EINVAL;
635 			goto out;
636 		}
637 		pi = &sc->port[i];
638 		pi->adapter = sc;
639 		pi->nqsets = port_qsets;
640 		pi->first_qset = i*port_qsets;
641 		pi->port_id = i;
642 		pi->tx_chan = i >= ai->nports0;
643 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
644 		sc->rxpkt_map[pi->txpkt_intf] = i;
645 		sc->port[i].tx_chan = i >= ai->nports0;
646 		sc->portdev[i] = child;
647 		device_set_softc(child, pi);
648 	}
649 	if ((error = bus_generic_attach(dev)) != 0)
650 		goto out;
651 
652 	/* initialize sge private state */
653 	t3_sge_init_adapter(sc);
654 
655 	t3_led_ready(sc);
656 
657 	error = t3_get_fw_version(sc, &vers);
658 	if (error)
659 		goto out;
660 
661 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
662 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
663 	    G_FW_VERSION_MICRO(vers));
664 
665 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
666 		 ai->desc, is_offload(sc) ? "R" : "",
667 		 sc->params.vpd.ec, sc->params.vpd.sn);
668 	device_set_desc_copy(dev, buf);
669 
670 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
671 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
672 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
673 
674 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
675 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
676 	t3_add_attach_sysctls(sc);
677 
678 #ifdef TCP_OFFLOAD
679 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
680 		sc->cpl_handler[i] = cpl_not_handled;
681 #endif
682 
683 	t3_intr_clear(sc);
684 	error = cxgb_setup_interrupts(sc);
685 out:
686 	if (error)
687 		cxgb_free(sc);
688 
689 	return (error);
690 }
691 
692 /*
693  * The cxgb_controller_detach routine is called with the device is
694  * unloaded from the system.
695  */
696 
697 static int
698 cxgb_controller_detach(device_t dev)
699 {
700 	struct adapter *sc;
701 
702 	sc = device_get_softc(dev);
703 
704 	cxgb_free(sc);
705 
706 	return (0);
707 }
708 
709 /*
710  * The cxgb_free() is called by the cxgb_controller_detach() routine
711  * to tear down the structures that were built up in
712  * cxgb_controller_attach(), and should be the final piece of work
713  * done when fully unloading the driver.
714  *
715  *
716  *  1. Shutting down the threads started by the cxgb_controller_attach()
717  *     routine.
718  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
719  *  3. Detaching all of the port devices created during the
720  *     cxgb_controller_attach() routine.
721  *  4. Removing the device children created via cxgb_controller_attach().
722  *  5. Releasing PCI resources associated with the device.
723  *  6. Turning off the offload support, iff it was turned on.
724  *  7. Destroying the mutexes created in cxgb_controller_attach().
725  *
726  */
727 static void
728 cxgb_free(struct adapter *sc)
729 {
730 	int i, nqsets = 0;
731 
732 	ADAPTER_LOCK(sc);
733 	sc->flags |= CXGB_SHUTDOWN;
734 	ADAPTER_UNLOCK(sc);
735 
736 	/*
737 	 * Make sure all child devices are gone.
738 	 */
739 	bus_generic_detach(sc->dev);
740 	for (i = 0; i < (sc)->params.nports; i++) {
741 		if (sc->portdev[i] &&
742 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
743 			device_printf(sc->dev, "failed to delete child port\n");
744 		nqsets += sc->port[i].nqsets;
745 	}
746 
747 	/*
748 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
749 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
750 	 * all open devices have been closed.
751 	 */
752 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
753 					   __func__, sc->open_device_map));
754 	for (i = 0; i < sc->params.nports; i++) {
755 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
756 						  __func__, i));
757 	}
758 
759 	/*
760 	 * Finish off the adapter's callouts.
761 	 */
762 	callout_drain(&sc->cxgb_tick_ch);
763 	callout_drain(&sc->sge_timer_ch);
764 
765 	/*
766 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
767 	 * sysctls are cleaned up by the kernel linker.
768 	 */
769 	if (sc->flags & FULL_INIT_DONE) {
770  		t3_free_sge_resources(sc, nqsets);
771  		sc->flags &= ~FULL_INIT_DONE;
772  	}
773 
774 	/*
775 	 * Release all interrupt resources.
776 	 */
777 	cxgb_teardown_interrupts(sc);
778 	if (sc->flags & (USING_MSI | USING_MSIX)) {
779 		device_printf(sc->dev, "releasing msi message(s)\n");
780 		pci_release_msi(sc->dev);
781 	} else {
782 		device_printf(sc->dev, "no msi message to release\n");
783 	}
784 
785 	if (sc->msix_regs_res != NULL) {
786 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
787 		    sc->msix_regs_res);
788 	}
789 
790 	/*
791 	 * Free the adapter's taskqueue.
792 	 */
793 	if (sc->tq != NULL) {
794 		taskqueue_free(sc->tq);
795 		sc->tq = NULL;
796 	}
797 
798 	free(sc->filters, M_DEVBUF);
799 	t3_sge_free(sc);
800 
801 	if (sc->udbs_res != NULL)
802 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
803 		    sc->udbs_res);
804 
805 	if (sc->regs_res != NULL)
806 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
807 		    sc->regs_res);
808 
809 	MTX_DESTROY(&sc->mdio_lock);
810 	MTX_DESTROY(&sc->sge.reg_lock);
811 	MTX_DESTROY(&sc->elmer_lock);
812 	mtx_lock(&t3_list_lock);
813 	SLIST_REMOVE(&t3_list, sc, adapter, link);
814 	mtx_unlock(&t3_list_lock);
815 	ADAPTER_LOCK_DEINIT(sc);
816 }
817 
818 /**
819  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
820  *	@sc: the controller softc
821  *
822  *	Determines how many sets of SGE queues to use and initializes them.
823  *	We support multiple queue sets per port if we have MSI-X, otherwise
824  *	just one queue set per port.
825  */
826 static int
827 setup_sge_qsets(adapter_t *sc)
828 {
829 	int i, j, err, irq_idx = 0, qset_idx = 0;
830 	u_int ntxq = SGE_TXQ_PER_SET;
831 
832 	if ((err = t3_sge_alloc(sc)) != 0) {
833 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
834 		return (err);
835 	}
836 
837 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
838 		irq_idx = -1;
839 
840 	for (i = 0; i < (sc)->params.nports; i++) {
841 		struct port_info *pi = &sc->port[i];
842 
843 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
844 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
845 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
846 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
847 			if (err) {
848 				t3_free_sge_resources(sc, qset_idx);
849 				device_printf(sc->dev,
850 				    "t3_sge_alloc_qset failed with %d\n", err);
851 				return (err);
852 			}
853 		}
854 	}
855 
856 	sc->nqsets = qset_idx;
857 
858 	return (0);
859 }
860 
861 static void
862 cxgb_teardown_interrupts(adapter_t *sc)
863 {
864 	int i;
865 
866 	for (i = 0; i < SGE_QSETS; i++) {
867 		if (sc->msix_intr_tag[i] == NULL) {
868 
869 			/* Should have been setup fully or not at all */
870 			KASSERT(sc->msix_irq_res[i] == NULL &&
871 				sc->msix_irq_rid[i] == 0,
872 				("%s: half-done interrupt (%d).", __func__, i));
873 
874 			continue;
875 		}
876 
877 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
878 				  sc->msix_intr_tag[i]);
879 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
880 				     sc->msix_irq_res[i]);
881 
882 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
883 		sc->msix_irq_rid[i] = 0;
884 	}
885 
886 	if (sc->intr_tag) {
887 		KASSERT(sc->irq_res != NULL,
888 			("%s: half-done interrupt.", __func__));
889 
890 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
891 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
892 				     sc->irq_res);
893 
894 		sc->irq_res = sc->intr_tag = NULL;
895 		sc->irq_rid = 0;
896 	}
897 }
898 
899 static int
900 cxgb_setup_interrupts(adapter_t *sc)
901 {
902 	struct resource *res;
903 	void *tag;
904 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
905 
906 	sc->irq_rid = intr_flag ? 1 : 0;
907 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
908 					     RF_SHAREABLE | RF_ACTIVE);
909 	if (sc->irq_res == NULL) {
910 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
911 			      intr_flag, sc->irq_rid);
912 		err = EINVAL;
913 		sc->irq_rid = 0;
914 	} else {
915 		err = bus_setup_intr(sc->dev, sc->irq_res,
916 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
917 		    sc->cxgb_intr, sc, &sc->intr_tag);
918 
919 		if (err) {
920 			device_printf(sc->dev,
921 				      "Cannot set up interrupt (%x, %u, %d)\n",
922 				      intr_flag, sc->irq_rid, err);
923 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
924 					     sc->irq_res);
925 			sc->irq_res = sc->intr_tag = NULL;
926 			sc->irq_rid = 0;
927 		}
928 	}
929 
930 	/* That's all for INTx or MSI */
931 	if (!(intr_flag & USING_MSIX) || err)
932 		return (err);
933 
934 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
935 	for (i = 0; i < sc->msi_count - 1; i++) {
936 		rid = i + 2;
937 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
938 					     RF_SHAREABLE | RF_ACTIVE);
939 		if (res == NULL) {
940 			device_printf(sc->dev, "Cannot allocate interrupt "
941 				      "for message %d\n", rid);
942 			err = EINVAL;
943 			break;
944 		}
945 
946 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
947 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
948 		if (err) {
949 			device_printf(sc->dev, "Cannot set up interrupt "
950 				      "for message %d (%d)\n", rid, err);
951 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
952 			break;
953 		}
954 
955 		sc->msix_irq_rid[i] = rid;
956 		sc->msix_irq_res[i] = res;
957 		sc->msix_intr_tag[i] = tag;
958 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
959 	}
960 
961 	if (err)
962 		cxgb_teardown_interrupts(sc);
963 
964 	return (err);
965 }
966 
967 
968 static int
969 cxgb_port_probe(device_t dev)
970 {
971 	struct port_info *p;
972 	char buf[80];
973 	const char *desc;
974 
975 	p = device_get_softc(dev);
976 	desc = p->phy.desc;
977 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
978 	device_set_desc_copy(dev, buf);
979 	return (0);
980 }
981 
982 
983 static int
984 cxgb_makedev(struct port_info *pi)
985 {
986 
987 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
988 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
989 
990 	if (pi->port_cdev == NULL)
991 		return (ENOMEM);
992 
993 	pi->port_cdev->si_drv1 = (void *)pi;
994 
995 	return (0);
996 }
997 
998 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
999     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1000     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
1001 #define CXGB_CAP_ENABLE CXGB_CAP
1002 
1003 static int
1004 cxgb_port_attach(device_t dev)
1005 {
1006 	struct port_info *p;
1007 	struct ifnet *ifp;
1008 	int err;
1009 	struct adapter *sc;
1010 
1011 	p = device_get_softc(dev);
1012 	sc = p->adapter;
1013 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1014 	    device_get_unit(device_get_parent(dev)), p->port_id);
1015 	PORT_LOCK_INIT(p, p->lockbuf);
1016 
1017 	callout_init(&p->link_check_ch, 1);
1018 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1019 
1020 	/* Allocate an ifnet object and set it up */
1021 	ifp = p->ifp = if_alloc(IFT_ETHER);
1022 	if (ifp == NULL) {
1023 		device_printf(dev, "Cannot allocate ifnet\n");
1024 		return (ENOMEM);
1025 	}
1026 
1027 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1028 	ifp->if_init = cxgb_init;
1029 	ifp->if_softc = p;
1030 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1031 	ifp->if_ioctl = cxgb_ioctl;
1032 	ifp->if_transmit = cxgb_transmit;
1033 	ifp->if_qflush = cxgb_qflush;
1034 	ifp->if_get_counter = cxgb_get_counter;
1035 
1036 	ifp->if_capabilities = CXGB_CAP;
1037 #ifdef TCP_OFFLOAD
1038 	if (is_offload(sc))
1039 		ifp->if_capabilities |= IFCAP_TOE4;
1040 #endif
1041 	ifp->if_capenable = CXGB_CAP_ENABLE;
1042 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1043 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1044 
1045 	/*
1046 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1047 	 */
1048 	if (sc->params.nports > 2) {
1049 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1050 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1051 		ifp->if_hwassist &= ~CSUM_TSO;
1052 	}
1053 
1054 	ether_ifattach(ifp, p->hw_addr);
1055 
1056 	/* Attach driver netdump methods. */
1057 	NETDUMP_SET(ifp, cxgb);
1058 
1059 #ifdef DEFAULT_JUMBO
1060 	if (sc->params.nports <= 2)
1061 		ifp->if_mtu = ETHERMTU_JUMBO;
1062 #endif
1063 	if ((err = cxgb_makedev(p)) != 0) {
1064 		printf("makedev failed %d\n", err);
1065 		return (err);
1066 	}
1067 
1068 	/* Create a list of media supported by this port */
1069 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1070 	    cxgb_media_status);
1071 	cxgb_build_medialist(p);
1072 
1073 	t3_sge_init_port(p);
1074 
1075 	return (err);
1076 }
1077 
1078 /*
1079  * cxgb_port_detach() is called via the device_detach methods when
1080  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1081  * removing the device from the view of the kernel, i.e. from all
1082  * interfaces lists etc.  This routine is only called when the driver is
1083  * being unloaded, not when the link goes down.
1084  */
1085 static int
1086 cxgb_port_detach(device_t dev)
1087 {
1088 	struct port_info *p;
1089 	struct adapter *sc;
1090 	int i;
1091 
1092 	p = device_get_softc(dev);
1093 	sc = p->adapter;
1094 
1095 	/* Tell cxgb_ioctl and if_init that the port is going away */
1096 	ADAPTER_LOCK(sc);
1097 	SET_DOOMED(p);
1098 	wakeup(&sc->flags);
1099 	while (IS_BUSY(sc))
1100 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1101 	SET_BUSY(sc);
1102 	ADAPTER_UNLOCK(sc);
1103 
1104 	if (p->port_cdev != NULL)
1105 		destroy_dev(p->port_cdev);
1106 
1107 	cxgb_uninit_synchronized(p);
1108 	ether_ifdetach(p->ifp);
1109 
1110 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1111 		struct sge_qset *qs = &sc->sge.qs[i];
1112 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1113 
1114 		callout_drain(&txq->txq_watchdog);
1115 		callout_drain(&txq->txq_timer);
1116 	}
1117 
1118 	PORT_LOCK_DEINIT(p);
1119 	if_free(p->ifp);
1120 	p->ifp = NULL;
1121 
1122 	ADAPTER_LOCK(sc);
1123 	CLR_BUSY(sc);
1124 	wakeup_one(&sc->flags);
1125 	ADAPTER_UNLOCK(sc);
1126 	return (0);
1127 }
1128 
1129 void
1130 t3_fatal_err(struct adapter *sc)
1131 {
1132 	u_int fw_status[4];
1133 
1134 	if (sc->flags & FULL_INIT_DONE) {
1135 		t3_sge_stop(sc);
1136 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1137 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1138 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1139 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1140 		t3_intr_disable(sc);
1141 	}
1142 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1143 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1144 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1145 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1146 }
1147 
1148 int
1149 t3_os_find_pci_capability(adapter_t *sc, int cap)
1150 {
1151 	device_t dev;
1152 	struct pci_devinfo *dinfo;
1153 	pcicfgregs *cfg;
1154 	uint32_t status;
1155 	uint8_t ptr;
1156 
1157 	dev = sc->dev;
1158 	dinfo = device_get_ivars(dev);
1159 	cfg = &dinfo->cfg;
1160 
1161 	status = pci_read_config(dev, PCIR_STATUS, 2);
1162 	if (!(status & PCIM_STATUS_CAPPRESENT))
1163 		return (0);
1164 
1165 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1166 	case 0:
1167 	case 1:
1168 		ptr = PCIR_CAP_PTR;
1169 		break;
1170 	case 2:
1171 		ptr = PCIR_CAP_PTR_2;
1172 		break;
1173 	default:
1174 		return (0);
1175 		break;
1176 	}
1177 	ptr = pci_read_config(dev, ptr, 1);
1178 
1179 	while (ptr != 0) {
1180 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1181 			return (ptr);
1182 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1183 	}
1184 
1185 	return (0);
1186 }
1187 
1188 int
1189 t3_os_pci_save_state(struct adapter *sc)
1190 {
1191 	device_t dev;
1192 	struct pci_devinfo *dinfo;
1193 
1194 	dev = sc->dev;
1195 	dinfo = device_get_ivars(dev);
1196 
1197 	pci_cfg_save(dev, dinfo, 0);
1198 	return (0);
1199 }
1200 
1201 int
1202 t3_os_pci_restore_state(struct adapter *sc)
1203 {
1204 	device_t dev;
1205 	struct pci_devinfo *dinfo;
1206 
1207 	dev = sc->dev;
1208 	dinfo = device_get_ivars(dev);
1209 
1210 	pci_cfg_restore(dev, dinfo);
1211 	return (0);
1212 }
1213 
1214 /**
1215  *	t3_os_link_changed - handle link status changes
1216  *	@sc: the adapter associated with the link change
1217  *	@port_id: the port index whose link status has changed
1218  *	@link_status: the new status of the link
1219  *	@speed: the new speed setting
1220  *	@duplex: the new duplex setting
1221  *	@fc: the new flow-control setting
1222  *
1223  *	This is the OS-dependent handler for link status changes.  The OS
1224  *	neutral handler takes care of most of the processing for these events,
1225  *	then calls this handler for any OS-specific processing.
1226  */
1227 void
1228 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1229      int duplex, int fc, int mac_was_reset)
1230 {
1231 	struct port_info *pi = &adapter->port[port_id];
1232 	struct ifnet *ifp = pi->ifp;
1233 
1234 	/* no race with detach, so ifp should always be good */
1235 	KASSERT(ifp, ("%s: if detached.", __func__));
1236 
1237 	/* Reapply mac settings if they were lost due to a reset */
1238 	if (mac_was_reset) {
1239 		PORT_LOCK(pi);
1240 		cxgb_update_mac_settings(pi);
1241 		PORT_UNLOCK(pi);
1242 	}
1243 
1244 	if (link_status) {
1245 		ifp->if_baudrate = IF_Mbps(speed);
1246 		if_link_state_change(ifp, LINK_STATE_UP);
1247 	} else
1248 		if_link_state_change(ifp, LINK_STATE_DOWN);
1249 }
1250 
1251 /**
1252  *	t3_os_phymod_changed - handle PHY module changes
1253  *	@phy: the PHY reporting the module change
1254  *	@mod_type: new module type
1255  *
1256  *	This is the OS-dependent handler for PHY module changes.  It is
1257  *	invoked when a PHY module is removed or inserted for any OS-specific
1258  *	processing.
1259  */
1260 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1261 {
1262 	static const char *mod_str[] = {
1263 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1264 	};
1265 	struct port_info *pi = &adap->port[port_id];
1266 	int mod = pi->phy.modtype;
1267 
1268 	if (mod != pi->media.ifm_cur->ifm_data)
1269 		cxgb_build_medialist(pi);
1270 
1271 	if (mod == phy_modtype_none)
1272 		if_printf(pi->ifp, "PHY module unplugged\n");
1273 	else {
1274 		KASSERT(mod < ARRAY_SIZE(mod_str),
1275 			("invalid PHY module type %d", mod));
1276 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1277 	}
1278 }
1279 
1280 void
1281 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1282 {
1283 
1284 	/*
1285 	 * The ifnet might not be allocated before this gets called,
1286 	 * as this is called early on in attach by t3_prep_adapter
1287 	 * save the address off in the port structure
1288 	 */
1289 	if (cxgb_debug)
1290 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1291 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1292 }
1293 
1294 /*
1295  * Programs the XGMAC based on the settings in the ifnet.  These settings
1296  * include MTU, MAC address, mcast addresses, etc.
1297  */
1298 static void
1299 cxgb_update_mac_settings(struct port_info *p)
1300 {
1301 	struct ifnet *ifp = p->ifp;
1302 	struct t3_rx_mode rm;
1303 	struct cmac *mac = &p->mac;
1304 	int mtu, hwtagging;
1305 
1306 	PORT_LOCK_ASSERT_OWNED(p);
1307 
1308 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1309 
1310 	mtu = ifp->if_mtu;
1311 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1312 		mtu += ETHER_VLAN_ENCAP_LEN;
1313 
1314 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1315 
1316 	t3_mac_set_mtu(mac, mtu);
1317 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1318 	t3_mac_set_address(mac, 0, p->hw_addr);
1319 	t3_init_rx_mode(&rm, p);
1320 	t3_mac_set_rx_mode(mac, &rm);
1321 }
1322 
1323 
1324 static int
1325 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1326 			      unsigned long n)
1327 {
1328 	int attempts = 5;
1329 
1330 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1331 		if (!--attempts)
1332 			return (ETIMEDOUT);
1333 		t3_os_sleep(10);
1334 	}
1335 	return 0;
1336 }
1337 
1338 static int
1339 init_tp_parity(struct adapter *adap)
1340 {
1341 	int i;
1342 	struct mbuf *m;
1343 	struct cpl_set_tcb_field *greq;
1344 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1345 
1346 	t3_tp_set_offload_mode(adap, 1);
1347 
1348 	for (i = 0; i < 16; i++) {
1349 		struct cpl_smt_write_req *req;
1350 
1351 		m = m_gethdr(M_WAITOK, MT_DATA);
1352 		req = mtod(m, struct cpl_smt_write_req *);
1353 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1354 		memset(req, 0, sizeof(*req));
1355 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1356 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1357 		req->iff = i;
1358 		t3_mgmt_tx(adap, m);
1359 	}
1360 
1361 	for (i = 0; i < 2048; i++) {
1362 		struct cpl_l2t_write_req *req;
1363 
1364 		m = m_gethdr(M_WAITOK, MT_DATA);
1365 		req = mtod(m, struct cpl_l2t_write_req *);
1366 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1367 		memset(req, 0, sizeof(*req));
1368 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1369 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1370 		req->params = htonl(V_L2T_W_IDX(i));
1371 		t3_mgmt_tx(adap, m);
1372 	}
1373 
1374 	for (i = 0; i < 2048; i++) {
1375 		struct cpl_rte_write_req *req;
1376 
1377 		m = m_gethdr(M_WAITOK, MT_DATA);
1378 		req = mtod(m, struct cpl_rte_write_req *);
1379 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1380 		memset(req, 0, sizeof(*req));
1381 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1383 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1384 		t3_mgmt_tx(adap, m);
1385 	}
1386 
1387 	m = m_gethdr(M_WAITOK, MT_DATA);
1388 	greq = mtod(m, struct cpl_set_tcb_field *);
1389 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1390 	memset(greq, 0, sizeof(*greq));
1391 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1392 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1393 	greq->mask = htobe64(1);
1394 	t3_mgmt_tx(adap, m);
1395 
1396 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1397 	t3_tp_set_offload_mode(adap, 0);
1398 	return (i);
1399 }
1400 
1401 /**
1402  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1403  *	@adap: the adapter
1404  *
1405  *	Sets up RSS to distribute packets to multiple receive queues.  We
1406  *	configure the RSS CPU lookup table to distribute to the number of HW
1407  *	receive queues, and the response queue lookup table to narrow that
1408  *	down to the response queues actually configured for each port.
1409  *	We always configure the RSS mapping for two ports since the mapping
1410  *	table has plenty of entries.
1411  */
1412 static void
1413 setup_rss(adapter_t *adap)
1414 {
1415 	int i;
1416 	u_int nq[2];
1417 	uint8_t cpus[SGE_QSETS + 1];
1418 	uint16_t rspq_map[RSS_TABLE_SIZE];
1419 
1420 	for (i = 0; i < SGE_QSETS; ++i)
1421 		cpus[i] = i;
1422 	cpus[SGE_QSETS] = 0xff;
1423 
1424 	nq[0] = nq[1] = 0;
1425 	for_each_port(adap, i) {
1426 		const struct port_info *pi = adap2pinfo(adap, i);
1427 
1428 		nq[pi->tx_chan] += pi->nqsets;
1429 	}
1430 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1431 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1432 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1433 	}
1434 
1435 	/* Calculate the reverse RSS map table */
1436 	for (i = 0; i < SGE_QSETS; ++i)
1437 		adap->rrss_map[i] = 0xff;
1438 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1439 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1440 			adap->rrss_map[rspq_map[i]] = i;
1441 
1442 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1443 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1444 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1445 	              cpus, rspq_map);
1446 
1447 }
1448 static void
1449 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1450 			      int hi, int port)
1451 {
1452 	struct mbuf *m;
1453 	struct mngt_pktsched_wr *req;
1454 
1455 	m = m_gethdr(M_NOWAIT, MT_DATA);
1456 	if (m) {
1457 		req = mtod(m, struct mngt_pktsched_wr *);
1458 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1459 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1460 		req->sched = sched;
1461 		req->idx = qidx;
1462 		req->min = lo;
1463 		req->max = hi;
1464 		req->binding = port;
1465 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1466 		t3_mgmt_tx(adap, m);
1467 	}
1468 }
1469 
1470 static void
1471 bind_qsets(adapter_t *sc)
1472 {
1473 	int i, j;
1474 
1475 	for (i = 0; i < (sc)->params.nports; ++i) {
1476 		const struct port_info *pi = adap2pinfo(sc, i);
1477 
1478 		for (j = 0; j < pi->nqsets; ++j) {
1479 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1480 					  -1, pi->tx_chan);
1481 
1482 		}
1483 	}
1484 }
1485 
1486 static void
1487 update_tpeeprom(struct adapter *adap)
1488 {
1489 	const struct firmware *tpeeprom;
1490 
1491 	uint32_t version;
1492 	unsigned int major, minor;
1493 	int ret, len;
1494 	char rev, name[32];
1495 
1496 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1497 
1498 	major = G_TP_VERSION_MAJOR(version);
1499 	minor = G_TP_VERSION_MINOR(version);
1500 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1501 		return;
1502 
1503 	rev = t3rev2char(adap);
1504 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1505 
1506 	tpeeprom = firmware_get(name);
1507 	if (tpeeprom == NULL) {
1508 		device_printf(adap->dev,
1509 			      "could not load TP EEPROM: unable to load %s\n",
1510 			      name);
1511 		return;
1512 	}
1513 
1514 	len = tpeeprom->datasize - 4;
1515 
1516 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1517 	if (ret)
1518 		goto release_tpeeprom;
1519 
1520 	if (len != TP_SRAM_LEN) {
1521 		device_printf(adap->dev,
1522 			      "%s length is wrong len=%d expected=%d\n", name,
1523 			      len, TP_SRAM_LEN);
1524 		return;
1525 	}
1526 
1527 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1528 	    TP_SRAM_OFFSET);
1529 
1530 	if (!ret) {
1531 		device_printf(adap->dev,
1532 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1533 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1534 	} else
1535 		device_printf(adap->dev,
1536 			      "Protocol SRAM image update in EEPROM failed\n");
1537 
1538 release_tpeeprom:
1539 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1540 
1541 	return;
1542 }
1543 
1544 static int
1545 update_tpsram(struct adapter *adap)
1546 {
1547 	const struct firmware *tpsram;
1548 	int ret;
1549 	char rev, name[32];
1550 
1551 	rev = t3rev2char(adap);
1552 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1553 
1554 	update_tpeeprom(adap);
1555 
1556 	tpsram = firmware_get(name);
1557 	if (tpsram == NULL){
1558 		device_printf(adap->dev, "could not load TP SRAM\n");
1559 		return (EINVAL);
1560 	} else
1561 		device_printf(adap->dev, "updating TP SRAM\n");
1562 
1563 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1564 	if (ret)
1565 		goto release_tpsram;
1566 
1567 	ret = t3_set_proto_sram(adap, tpsram->data);
1568 	if (ret)
1569 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1570 
1571 release_tpsram:
1572 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1573 
1574 	return ret;
1575 }
1576 
1577 /**
1578  *	cxgb_up - enable the adapter
1579  *	@adap: adapter being enabled
1580  *
1581  *	Called when the first port is enabled, this function performs the
1582  *	actions necessary to make an adapter operational, such as completing
1583  *	the initialization of HW modules, and enabling interrupts.
1584  */
1585 static int
1586 cxgb_up(struct adapter *sc)
1587 {
1588 	int err = 0;
1589 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1590 
1591 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1592 					   __func__, sc->open_device_map));
1593 
1594 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1595 
1596 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1597 
1598 		if ((sc->flags & FW_UPTODATE) == 0)
1599 			if ((err = upgrade_fw(sc)))
1600 				goto out;
1601 
1602 		if ((sc->flags & TPS_UPTODATE) == 0)
1603 			if ((err = update_tpsram(sc)))
1604 				goto out;
1605 
1606 		if (is_offload(sc) && nfilters != 0) {
1607 			sc->params.mc5.nservers = 0;
1608 
1609 			if (nfilters < 0)
1610 				sc->params.mc5.nfilters = mxf;
1611 			else
1612 				sc->params.mc5.nfilters = min(nfilters, mxf);
1613 		}
1614 
1615 		err = t3_init_hw(sc, 0);
1616 		if (err)
1617 			goto out;
1618 
1619 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1620 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1621 
1622 		err = setup_sge_qsets(sc);
1623 		if (err)
1624 			goto out;
1625 
1626 		alloc_filters(sc);
1627 		setup_rss(sc);
1628 
1629 		t3_add_configured_sysctls(sc);
1630 		sc->flags |= FULL_INIT_DONE;
1631 	}
1632 
1633 	t3_intr_clear(sc);
1634 	t3_sge_start(sc);
1635 	t3_intr_enable(sc);
1636 
1637 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1638 	    is_offload(sc) && init_tp_parity(sc) == 0)
1639 		sc->flags |= TP_PARITY_INIT;
1640 
1641 	if (sc->flags & TP_PARITY_INIT) {
1642 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1643 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1644 	}
1645 
1646 	if (!(sc->flags & QUEUES_BOUND)) {
1647 		bind_qsets(sc);
1648 		setup_hw_filters(sc);
1649 		sc->flags |= QUEUES_BOUND;
1650 	}
1651 
1652 	t3_sge_reset_adapter(sc);
1653 out:
1654 	return (err);
1655 }
1656 
1657 /*
1658  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1659  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1660  * during controller_detach, not here.
1661  */
1662 static void
1663 cxgb_down(struct adapter *sc)
1664 {
1665 	t3_sge_stop(sc);
1666 	t3_intr_disable(sc);
1667 }
1668 
1669 /*
1670  * if_init for cxgb ports.
1671  */
1672 static void
1673 cxgb_init(void *arg)
1674 {
1675 	struct port_info *p = arg;
1676 	struct adapter *sc = p->adapter;
1677 
1678 	ADAPTER_LOCK(sc);
1679 	cxgb_init_locked(p); /* releases adapter lock */
1680 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1681 }
1682 
1683 static int
1684 cxgb_init_locked(struct port_info *p)
1685 {
1686 	struct adapter *sc = p->adapter;
1687 	struct ifnet *ifp = p->ifp;
1688 	struct cmac *mac = &p->mac;
1689 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1690 
1691 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1692 
1693 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1694 		gave_up_lock = 1;
1695 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1696 			rc = EINTR;
1697 			goto done;
1698 		}
1699 	}
1700 	if (IS_DOOMED(p)) {
1701 		rc = ENXIO;
1702 		goto done;
1703 	}
1704 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1705 
1706 	/*
1707 	 * The code that runs during one-time adapter initialization can sleep
1708 	 * so it's important not to hold any locks across it.
1709 	 */
1710 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1711 
1712 	if (may_sleep) {
1713 		SET_BUSY(sc);
1714 		gave_up_lock = 1;
1715 		ADAPTER_UNLOCK(sc);
1716 	}
1717 
1718 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1719 			goto done;
1720 
1721 	PORT_LOCK(p);
1722 	if (isset(&sc->open_device_map, p->port_id) &&
1723 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1724 		PORT_UNLOCK(p);
1725 		goto done;
1726 	}
1727 	t3_port_intr_enable(sc, p->port_id);
1728 	if (!mac->multiport)
1729 		t3_mac_init(mac);
1730 	cxgb_update_mac_settings(p);
1731 	t3_link_start(&p->phy, mac, &p->link_config);
1732 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1733 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1734 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1735 	PORT_UNLOCK(p);
1736 
1737 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1738 		struct sge_qset *qs = &sc->sge.qs[i];
1739 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1740 
1741 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1742 				 txq->txq_watchdog.c_cpu);
1743 	}
1744 
1745 	/* all ok */
1746 	setbit(&sc->open_device_map, p->port_id);
1747 	callout_reset(&p->link_check_ch,
1748 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1749 	    link_check_callout, p);
1750 
1751 done:
1752 	if (may_sleep) {
1753 		ADAPTER_LOCK(sc);
1754 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1755 		CLR_BUSY(sc);
1756 	}
1757 	if (gave_up_lock)
1758 		wakeup_one(&sc->flags);
1759 	ADAPTER_UNLOCK(sc);
1760 	return (rc);
1761 }
1762 
1763 static int
1764 cxgb_uninit_locked(struct port_info *p)
1765 {
1766 	struct adapter *sc = p->adapter;
1767 	int rc;
1768 
1769 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1770 
1771 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1772 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1773 			rc = EINTR;
1774 			goto done;
1775 		}
1776 	}
1777 	if (IS_DOOMED(p)) {
1778 		rc = ENXIO;
1779 		goto done;
1780 	}
1781 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1782 	SET_BUSY(sc);
1783 	ADAPTER_UNLOCK(sc);
1784 
1785 	rc = cxgb_uninit_synchronized(p);
1786 
1787 	ADAPTER_LOCK(sc);
1788 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1789 	CLR_BUSY(sc);
1790 	wakeup_one(&sc->flags);
1791 done:
1792 	ADAPTER_UNLOCK(sc);
1793 	return (rc);
1794 }
1795 
1796 /*
1797  * Called on "ifconfig down", and from port_detach
1798  */
1799 static int
1800 cxgb_uninit_synchronized(struct port_info *pi)
1801 {
1802 	struct adapter *sc = pi->adapter;
1803 	struct ifnet *ifp = pi->ifp;
1804 
1805 	/*
1806 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1807 	 */
1808 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1809 
1810 	/*
1811 	 * Clear this port's bit from the open device map, and then drain all
1812 	 * the tasks that can access/manipulate this port's port_info or ifp.
1813 	 * We disable this port's interrupts here and so the slow/ext
1814 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1815 	 * be enqueued every second but the runs after this drain will not see
1816 	 * this port in the open device map.
1817 	 *
1818 	 * A well behaved task must take open_device_map into account and ignore
1819 	 * ports that are not open.
1820 	 */
1821 	clrbit(&sc->open_device_map, pi->port_id);
1822 	t3_port_intr_disable(sc, pi->port_id);
1823 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1824 	taskqueue_drain(sc->tq, &sc->tick_task);
1825 
1826 	callout_drain(&pi->link_check_ch);
1827 	taskqueue_drain(sc->tq, &pi->link_check_task);
1828 
1829 	PORT_LOCK(pi);
1830 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1831 
1832 	/* disable pause frames */
1833 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1834 
1835 	/* Reset RX FIFO HWM */
1836 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1837 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1838 
1839 	DELAY(100 * 1000);
1840 
1841 	/* Wait for TXFIFO empty */
1842 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1843 			F_TXFIFO_EMPTY, 1, 20, 5);
1844 
1845 	DELAY(100 * 1000);
1846 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1847 
1848 	pi->phy.ops->power_down(&pi->phy, 1);
1849 
1850 	PORT_UNLOCK(pi);
1851 
1852 	pi->link_config.link_ok = 0;
1853 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1854 
1855 	if (sc->open_device_map == 0)
1856 		cxgb_down(pi->adapter);
1857 
1858 	return (0);
1859 }
1860 
1861 /*
1862  * Mark lro enabled or disabled in all qsets for this port
1863  */
1864 static int
1865 cxgb_set_lro(struct port_info *p, int enabled)
1866 {
1867 	int i;
1868 	struct adapter *adp = p->adapter;
1869 	struct sge_qset *q;
1870 
1871 	for (i = 0; i < p->nqsets; i++) {
1872 		q = &adp->sge.qs[p->first_qset + i];
1873 		q->lro.enabled = (enabled != 0);
1874 	}
1875 	return (0);
1876 }
1877 
1878 static int
1879 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1880 {
1881 	struct port_info *p = ifp->if_softc;
1882 	struct adapter *sc = p->adapter;
1883 	struct ifreq *ifr = (struct ifreq *)data;
1884 	int flags, error = 0, mtu;
1885 	uint32_t mask;
1886 
1887 	switch (command) {
1888 	case SIOCSIFMTU:
1889 		ADAPTER_LOCK(sc);
1890 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1891 		if (error) {
1892 fail:
1893 			ADAPTER_UNLOCK(sc);
1894 			return (error);
1895 		}
1896 
1897 		mtu = ifr->ifr_mtu;
1898 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1899 			error = EINVAL;
1900 		} else {
1901 			ifp->if_mtu = mtu;
1902 			PORT_LOCK(p);
1903 			cxgb_update_mac_settings(p);
1904 			PORT_UNLOCK(p);
1905 		}
1906 		ADAPTER_UNLOCK(sc);
1907 		break;
1908 	case SIOCSIFFLAGS:
1909 		ADAPTER_LOCK(sc);
1910 		if (IS_DOOMED(p)) {
1911 			error = ENXIO;
1912 			goto fail;
1913 		}
1914 		if (ifp->if_flags & IFF_UP) {
1915 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1916 				flags = p->if_flags;
1917 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1918 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1919 					if (IS_BUSY(sc)) {
1920 						error = EBUSY;
1921 						goto fail;
1922 					}
1923 					PORT_LOCK(p);
1924 					cxgb_update_mac_settings(p);
1925 					PORT_UNLOCK(p);
1926 				}
1927 				ADAPTER_UNLOCK(sc);
1928 			} else
1929 				error = cxgb_init_locked(p);
1930 			p->if_flags = ifp->if_flags;
1931 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1932 			error = cxgb_uninit_locked(p);
1933 		else
1934 			ADAPTER_UNLOCK(sc);
1935 
1936 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1937 		break;
1938 	case SIOCADDMULTI:
1939 	case SIOCDELMULTI:
1940 		ADAPTER_LOCK(sc);
1941 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1942 		if (error)
1943 			goto fail;
1944 
1945 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1946 			PORT_LOCK(p);
1947 			cxgb_update_mac_settings(p);
1948 			PORT_UNLOCK(p);
1949 		}
1950 		ADAPTER_UNLOCK(sc);
1951 
1952 		break;
1953 	case SIOCSIFCAP:
1954 		ADAPTER_LOCK(sc);
1955 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1956 		if (error)
1957 			goto fail;
1958 
1959 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1960 		if (mask & IFCAP_TXCSUM) {
1961 			ifp->if_capenable ^= IFCAP_TXCSUM;
1962 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1963 
1964 			if (IFCAP_TSO4 & ifp->if_capenable &&
1965 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1966 				ifp->if_capenable &= ~IFCAP_TSO4;
1967 				if_printf(ifp,
1968 				    "tso4 disabled due to -txcsum.\n");
1969 			}
1970 		}
1971 		if (mask & IFCAP_TXCSUM_IPV6) {
1972 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1973 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1974 
1975 			if (IFCAP_TSO6 & ifp->if_capenable &&
1976 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1977 				ifp->if_capenable &= ~IFCAP_TSO6;
1978 				if_printf(ifp,
1979 				    "tso6 disabled due to -txcsum6.\n");
1980 			}
1981 		}
1982 		if (mask & IFCAP_RXCSUM)
1983 			ifp->if_capenable ^= IFCAP_RXCSUM;
1984 		if (mask & IFCAP_RXCSUM_IPV6)
1985 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1986 
1987 		/*
1988 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1989 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1990 		 * sending a TSO request our way, so it's sufficient to toggle
1991 		 * IFCAP_TSOx only.
1992 		 */
1993 		if (mask & IFCAP_TSO4) {
1994 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1995 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1996 				if_printf(ifp, "enable txcsum first.\n");
1997 				error = EAGAIN;
1998 				goto fail;
1999 			}
2000 			ifp->if_capenable ^= IFCAP_TSO4;
2001 		}
2002 		if (mask & IFCAP_TSO6) {
2003 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2004 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2005 				if_printf(ifp, "enable txcsum6 first.\n");
2006 				error = EAGAIN;
2007 				goto fail;
2008 			}
2009 			ifp->if_capenable ^= IFCAP_TSO6;
2010 		}
2011 		if (mask & IFCAP_LRO) {
2012 			ifp->if_capenable ^= IFCAP_LRO;
2013 
2014 			/* Safe to do this even if cxgb_up not called yet */
2015 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2016 		}
2017 #ifdef TCP_OFFLOAD
2018 		if (mask & IFCAP_TOE4) {
2019 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2020 
2021 			error = toe_capability(p, enable);
2022 			if (error == 0)
2023 				ifp->if_capenable ^= mask;
2024 		}
2025 #endif
2026 		if (mask & IFCAP_VLAN_HWTAGGING) {
2027 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2028 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2029 				PORT_LOCK(p);
2030 				cxgb_update_mac_settings(p);
2031 				PORT_UNLOCK(p);
2032 			}
2033 		}
2034 		if (mask & IFCAP_VLAN_MTU) {
2035 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2036 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2037 				PORT_LOCK(p);
2038 				cxgb_update_mac_settings(p);
2039 				PORT_UNLOCK(p);
2040 			}
2041 		}
2042 		if (mask & IFCAP_VLAN_HWTSO)
2043 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2044 		if (mask & IFCAP_VLAN_HWCSUM)
2045 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2046 
2047 #ifdef VLAN_CAPABILITIES
2048 		VLAN_CAPABILITIES(ifp);
2049 #endif
2050 		ADAPTER_UNLOCK(sc);
2051 		break;
2052 	case SIOCSIFMEDIA:
2053 	case SIOCGIFMEDIA:
2054 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2055 		break;
2056 	default:
2057 		error = ether_ioctl(ifp, command, data);
2058 	}
2059 
2060 	return (error);
2061 }
2062 
2063 static int
2064 cxgb_media_change(struct ifnet *ifp)
2065 {
2066 	return (EOPNOTSUPP);
2067 }
2068 
2069 /*
2070  * Translates phy->modtype to the correct Ethernet media subtype.
2071  */
2072 static int
2073 cxgb_ifm_type(int mod)
2074 {
2075 	switch (mod) {
2076 	case phy_modtype_sr:
2077 		return (IFM_10G_SR);
2078 	case phy_modtype_lr:
2079 		return (IFM_10G_LR);
2080 	case phy_modtype_lrm:
2081 		return (IFM_10G_LRM);
2082 	case phy_modtype_twinax:
2083 		return (IFM_10G_TWINAX);
2084 	case phy_modtype_twinax_long:
2085 		return (IFM_10G_TWINAX_LONG);
2086 	case phy_modtype_none:
2087 		return (IFM_NONE);
2088 	case phy_modtype_unknown:
2089 		return (IFM_UNKNOWN);
2090 	}
2091 
2092 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2093 	return (IFM_UNKNOWN);
2094 }
2095 
2096 /*
2097  * Rebuilds the ifmedia list for this port, and sets the current media.
2098  */
2099 static void
2100 cxgb_build_medialist(struct port_info *p)
2101 {
2102 	struct cphy *phy = &p->phy;
2103 	struct ifmedia *media = &p->media;
2104 	int mod = phy->modtype;
2105 	int m = IFM_ETHER | IFM_FDX;
2106 
2107 	PORT_LOCK(p);
2108 
2109 	ifmedia_removeall(media);
2110 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2111 		/* Copper (RJ45) */
2112 
2113 		if (phy->caps & SUPPORTED_10000baseT_Full)
2114 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2115 
2116 		if (phy->caps & SUPPORTED_1000baseT_Full)
2117 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2118 
2119 		if (phy->caps & SUPPORTED_100baseT_Full)
2120 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2121 
2122 		if (phy->caps & SUPPORTED_10baseT_Full)
2123 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2124 
2125 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2126 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2127 
2128 	} else if (phy->caps & SUPPORTED_TP) {
2129 		/* Copper (CX4) */
2130 
2131 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2132 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2133 
2134 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2135 		ifmedia_set(media, m | IFM_10G_CX4);
2136 
2137 	} else if (phy->caps & SUPPORTED_FIBRE &&
2138 		   phy->caps & SUPPORTED_10000baseT_Full) {
2139 		/* 10G optical (but includes SFP+ twinax) */
2140 
2141 		m |= cxgb_ifm_type(mod);
2142 		if (IFM_SUBTYPE(m) == IFM_NONE)
2143 			m &= ~IFM_FDX;
2144 
2145 		ifmedia_add(media, m, mod, NULL);
2146 		ifmedia_set(media, m);
2147 
2148 	} else if (phy->caps & SUPPORTED_FIBRE &&
2149 		   phy->caps & SUPPORTED_1000baseT_Full) {
2150 		/* 1G optical */
2151 
2152 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2153 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2154 		ifmedia_set(media, m | IFM_1000_SX);
2155 
2156 	} else {
2157 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2158 			    phy->caps));
2159 	}
2160 
2161 	PORT_UNLOCK(p);
2162 }
2163 
2164 static void
2165 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2166 {
2167 	struct port_info *p = ifp->if_softc;
2168 	struct ifmedia_entry *cur = p->media.ifm_cur;
2169 	int speed = p->link_config.speed;
2170 
2171 	if (cur->ifm_data != p->phy.modtype) {
2172 		cxgb_build_medialist(p);
2173 		cur = p->media.ifm_cur;
2174 	}
2175 
2176 	ifmr->ifm_status = IFM_AVALID;
2177 	if (!p->link_config.link_ok)
2178 		return;
2179 
2180 	ifmr->ifm_status |= IFM_ACTIVE;
2181 
2182 	/*
2183 	 * active and current will differ iff current media is autoselect.  That
2184 	 * can happen only for copper RJ45.
2185 	 */
2186 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2187 		return;
2188 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2189 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2190 
2191 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2192 	if (speed == SPEED_10000)
2193 		ifmr->ifm_active |= IFM_10G_T;
2194 	else if (speed == SPEED_1000)
2195 		ifmr->ifm_active |= IFM_1000_T;
2196 	else if (speed == SPEED_100)
2197 		ifmr->ifm_active |= IFM_100_TX;
2198 	else if (speed == SPEED_10)
2199 		ifmr->ifm_active |= IFM_10_T;
2200 	else
2201 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2202 			    speed));
2203 }
2204 
2205 static uint64_t
2206 cxgb_get_counter(struct ifnet *ifp, ift_counter c)
2207 {
2208 	struct port_info *pi = ifp->if_softc;
2209 	struct adapter *sc = pi->adapter;
2210 	struct cmac *mac = &pi->mac;
2211 	struct mac_stats *mstats = &mac->stats;
2212 
2213 	cxgb_refresh_stats(pi);
2214 
2215 	switch (c) {
2216 	case IFCOUNTER_IPACKETS:
2217 		return (mstats->rx_frames);
2218 
2219 	case IFCOUNTER_IERRORS:
2220 		return (mstats->rx_jabber + mstats->rx_data_errs +
2221 		    mstats->rx_sequence_errs + mstats->rx_runt +
2222 		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2223 		    mstats->rx_short + mstats->rx_fcs_errs);
2224 
2225 	case IFCOUNTER_OPACKETS:
2226 		return (mstats->tx_frames);
2227 
2228 	case IFCOUNTER_OERRORS:
2229 		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2230 		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2231 		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2232 
2233 	case IFCOUNTER_COLLISIONS:
2234 		return (mstats->tx_total_collisions);
2235 
2236 	case IFCOUNTER_IBYTES:
2237 		return (mstats->rx_octets);
2238 
2239 	case IFCOUNTER_OBYTES:
2240 		return (mstats->tx_octets);
2241 
2242 	case IFCOUNTER_IMCASTS:
2243 		return (mstats->rx_mcast_frames);
2244 
2245 	case IFCOUNTER_OMCASTS:
2246 		return (mstats->tx_mcast_frames);
2247 
2248 	case IFCOUNTER_IQDROPS:
2249 		return (mstats->rx_cong_drops);
2250 
2251 	case IFCOUNTER_OQDROPS: {
2252 		int i;
2253 		uint64_t drops;
2254 
2255 		drops = 0;
2256 		if (sc->flags & FULL_INIT_DONE) {
2257 			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2258 				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2259 		}
2260 
2261 		return (drops);
2262 
2263 	}
2264 
2265 	default:
2266 		return (if_get_counter_default(ifp, c));
2267 	}
2268 }
2269 
2270 static void
2271 cxgb_async_intr(void *data)
2272 {
2273 	adapter_t *sc = data;
2274 
2275 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2276 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2277 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2278 }
2279 
2280 static void
2281 link_check_callout(void *arg)
2282 {
2283 	struct port_info *pi = arg;
2284 	struct adapter *sc = pi->adapter;
2285 
2286 	if (!isset(&sc->open_device_map, pi->port_id))
2287 		return;
2288 
2289 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2290 }
2291 
2292 static void
2293 check_link_status(void *arg, int pending)
2294 {
2295 	struct port_info *pi = arg;
2296 	struct adapter *sc = pi->adapter;
2297 
2298 	if (!isset(&sc->open_device_map, pi->port_id))
2299 		return;
2300 
2301 	t3_link_changed(sc, pi->port_id);
2302 
2303 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2304 	    pi->link_config.link_ok == 0)
2305 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2306 }
2307 
2308 void
2309 t3_os_link_intr(struct port_info *pi)
2310 {
2311 	/*
2312 	 * Schedule a link check in the near future.  If the link is flapping
2313 	 * rapidly we'll keep resetting the callout and delaying the check until
2314 	 * things stabilize a bit.
2315 	 */
2316 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2317 }
2318 
2319 static void
2320 check_t3b2_mac(struct adapter *sc)
2321 {
2322 	int i;
2323 
2324 	if (sc->flags & CXGB_SHUTDOWN)
2325 		return;
2326 
2327 	for_each_port(sc, i) {
2328 		struct port_info *p = &sc->port[i];
2329 		int status;
2330 #ifdef INVARIANTS
2331 		struct ifnet *ifp = p->ifp;
2332 #endif
2333 
2334 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2335 		    !p->link_config.link_ok)
2336 			continue;
2337 
2338 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2339 			("%s: state mismatch (drv_flags %x, device_map %x)",
2340 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2341 
2342 		PORT_LOCK(p);
2343 		status = t3b2_mac_watchdog_task(&p->mac);
2344 		if (status == 1)
2345 			p->mac.stats.num_toggled++;
2346 		else if (status == 2) {
2347 			struct cmac *mac = &p->mac;
2348 
2349 			cxgb_update_mac_settings(p);
2350 			t3_link_start(&p->phy, mac, &p->link_config);
2351 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2352 			t3_port_intr_enable(sc, p->port_id);
2353 			p->mac.stats.num_resets++;
2354 		}
2355 		PORT_UNLOCK(p);
2356 	}
2357 }
2358 
2359 static void
2360 cxgb_tick(void *arg)
2361 {
2362 	adapter_t *sc = (adapter_t *)arg;
2363 
2364 	if (sc->flags & CXGB_SHUTDOWN)
2365 		return;
2366 
2367 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2368 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2369 }
2370 
2371 void
2372 cxgb_refresh_stats(struct port_info *pi)
2373 {
2374 	struct timeval tv;
2375 	const struct timeval interval = {0, 250000};    /* 250ms */
2376 
2377 	getmicrotime(&tv);
2378 	timevalsub(&tv, &interval);
2379 	if (timevalcmp(&tv, &pi->last_refreshed, <))
2380 		return;
2381 
2382 	PORT_LOCK(pi);
2383 	t3_mac_update_stats(&pi->mac);
2384 	PORT_UNLOCK(pi);
2385 	getmicrotime(&pi->last_refreshed);
2386 }
2387 
2388 static void
2389 cxgb_tick_handler(void *arg, int count)
2390 {
2391 	adapter_t *sc = (adapter_t *)arg;
2392 	const struct adapter_params *p = &sc->params;
2393 	int i;
2394 	uint32_t cause, reset;
2395 
2396 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2397 		return;
2398 
2399 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2400 		check_t3b2_mac(sc);
2401 
2402 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2403 	if (cause) {
2404 		struct sge_qset *qs = &sc->sge.qs[0];
2405 		uint32_t mask, v;
2406 
2407 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2408 
2409 		mask = 1;
2410 		for (i = 0; i < SGE_QSETS; i++) {
2411 			if (v & mask)
2412 				qs[i].rspq.starved++;
2413 			mask <<= 1;
2414 		}
2415 
2416 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2417 
2418 		for (i = 0; i < SGE_QSETS * 2; i++) {
2419 			if (v & mask) {
2420 				qs[i / 2].fl[i % 2].empty++;
2421 			}
2422 			mask <<= 1;
2423 		}
2424 
2425 		/* clear */
2426 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2427 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2428 	}
2429 
2430 	for (i = 0; i < sc->params.nports; i++) {
2431 		struct port_info *pi = &sc->port[i];
2432 		struct cmac *mac = &pi->mac;
2433 
2434 		if (!isset(&sc->open_device_map, pi->port_id))
2435 			continue;
2436 
2437 		cxgb_refresh_stats(pi);
2438 
2439 		if (mac->multiport)
2440 			continue;
2441 
2442 		/* Count rx fifo overflows, once per second */
2443 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2444 		reset = 0;
2445 		if (cause & F_RXFIFO_OVERFLOW) {
2446 			mac->stats.rx_fifo_ovfl++;
2447 			reset |= F_RXFIFO_OVERFLOW;
2448 		}
2449 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2450 	}
2451 }
2452 
2453 static void
2454 touch_bars(device_t dev)
2455 {
2456 	/*
2457 	 * Don't enable yet
2458 	 */
2459 #if !defined(__LP64__) && 0
2460 	u32 v;
2461 
2462 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2463 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2464 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2465 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2466 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2467 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2468 #endif
2469 }
2470 
2471 static int
2472 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2473 {
2474 	uint8_t *buf;
2475 	int err = 0;
2476 	u32 aligned_offset, aligned_len, *p;
2477 	struct adapter *adapter = pi->adapter;
2478 
2479 
2480 	aligned_offset = offset & ~3;
2481 	aligned_len = (len + (offset & 3) + 3) & ~3;
2482 
2483 	if (aligned_offset != offset || aligned_len != len) {
2484 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2485 		if (!buf)
2486 			return (ENOMEM);
2487 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2488 		if (!err && aligned_len > 4)
2489 			err = t3_seeprom_read(adapter,
2490 					      aligned_offset + aligned_len - 4,
2491 					      (u32 *)&buf[aligned_len - 4]);
2492 		if (err)
2493 			goto out;
2494 		memcpy(buf + (offset & 3), data, len);
2495 	} else
2496 		buf = (uint8_t *)(uintptr_t)data;
2497 
2498 	err = t3_seeprom_wp(adapter, 0);
2499 	if (err)
2500 		goto out;
2501 
2502 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2503 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2504 		aligned_offset += 4;
2505 	}
2506 
2507 	if (!err)
2508 		err = t3_seeprom_wp(adapter, 1);
2509 out:
2510 	if (buf != data)
2511 		free(buf, M_DEVBUF);
2512 	return err;
2513 }
2514 
2515 
2516 static int
2517 in_range(int val, int lo, int hi)
2518 {
2519 	return val < 0 || (val <= hi && val >= lo);
2520 }
2521 
2522 static int
2523 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2524 {
2525        return (0);
2526 }
2527 
2528 static int
2529 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2530 {
2531        return (0);
2532 }
2533 
2534 static int
2535 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2536     int fflag, struct thread *td)
2537 {
2538 	int mmd, error = 0;
2539 	struct port_info *pi = dev->si_drv1;
2540 	adapter_t *sc = pi->adapter;
2541 
2542 #ifdef PRIV_SUPPORTED
2543 	if (priv_check(td, PRIV_DRIVER)) {
2544 		if (cxgb_debug)
2545 			printf("user does not have access to privileged ioctls\n");
2546 		return (EPERM);
2547 	}
2548 #else
2549 	if (suser(td)) {
2550 		if (cxgb_debug)
2551 			printf("user does not have access to privileged ioctls\n");
2552 		return (EPERM);
2553 	}
2554 #endif
2555 
2556 	switch (cmd) {
2557 	case CHELSIO_GET_MIIREG: {
2558 		uint32_t val;
2559 		struct cphy *phy = &pi->phy;
2560 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2561 
2562 		if (!phy->mdio_read)
2563 			return (EOPNOTSUPP);
2564 		if (is_10G(sc)) {
2565 			mmd = mid->phy_id >> 8;
2566 			if (!mmd)
2567 				mmd = MDIO_DEV_PCS;
2568 			else if (mmd > MDIO_DEV_VEND2)
2569 				return (EINVAL);
2570 
2571 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2572 					     mid->reg_num, &val);
2573 		} else
2574 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2575 					     mid->reg_num & 0x1f, &val);
2576 		if (error == 0)
2577 			mid->val_out = val;
2578 		break;
2579 	}
2580 	case CHELSIO_SET_MIIREG: {
2581 		struct cphy *phy = &pi->phy;
2582 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2583 
2584 		if (!phy->mdio_write)
2585 			return (EOPNOTSUPP);
2586 		if (is_10G(sc)) {
2587 			mmd = mid->phy_id >> 8;
2588 			if (!mmd)
2589 				mmd = MDIO_DEV_PCS;
2590 			else if (mmd > MDIO_DEV_VEND2)
2591 				return (EINVAL);
2592 
2593 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2594 					      mmd, mid->reg_num, mid->val_in);
2595 		} else
2596 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2597 					      mid->reg_num & 0x1f,
2598 					      mid->val_in);
2599 		break;
2600 	}
2601 	case CHELSIO_SETREG: {
2602 		struct ch_reg *edata = (struct ch_reg *)data;
2603 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2604 			return (EFAULT);
2605 		t3_write_reg(sc, edata->addr, edata->val);
2606 		break;
2607 	}
2608 	case CHELSIO_GETREG: {
2609 		struct ch_reg *edata = (struct ch_reg *)data;
2610 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2611 			return (EFAULT);
2612 		edata->val = t3_read_reg(sc, edata->addr);
2613 		break;
2614 	}
2615 	case CHELSIO_GET_SGE_CONTEXT: {
2616 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2617 		mtx_lock_spin(&sc->sge.reg_lock);
2618 		switch (ecntxt->cntxt_type) {
2619 		case CNTXT_TYPE_EGRESS:
2620 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2621 			    ecntxt->data);
2622 			break;
2623 		case CNTXT_TYPE_FL:
2624 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2625 			    ecntxt->data);
2626 			break;
2627 		case CNTXT_TYPE_RSP:
2628 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2629 			    ecntxt->data);
2630 			break;
2631 		case CNTXT_TYPE_CQ:
2632 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2633 			    ecntxt->data);
2634 			break;
2635 		default:
2636 			error = EINVAL;
2637 			break;
2638 		}
2639 		mtx_unlock_spin(&sc->sge.reg_lock);
2640 		break;
2641 	}
2642 	case CHELSIO_GET_SGE_DESC: {
2643 		struct ch_desc *edesc = (struct ch_desc *)data;
2644 		int ret;
2645 		if (edesc->queue_num >= SGE_QSETS * 6)
2646 			return (EINVAL);
2647 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2648 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2649 		if (ret < 0)
2650 			return (EINVAL);
2651 		edesc->size = ret;
2652 		break;
2653 	}
2654 	case CHELSIO_GET_QSET_PARAMS: {
2655 		struct qset_params *q;
2656 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2657 		int q1 = pi->first_qset;
2658 		int nqsets = pi->nqsets;
2659 		int i;
2660 
2661 		if (t->qset_idx >= nqsets)
2662 			return EINVAL;
2663 
2664 		i = q1 + t->qset_idx;
2665 		q = &sc->params.sge.qset[i];
2666 		t->rspq_size   = q->rspq_size;
2667 		t->txq_size[0] = q->txq_size[0];
2668 		t->txq_size[1] = q->txq_size[1];
2669 		t->txq_size[2] = q->txq_size[2];
2670 		t->fl_size[0]  = q->fl_size;
2671 		t->fl_size[1]  = q->jumbo_size;
2672 		t->polling     = q->polling;
2673 		t->lro         = q->lro;
2674 		t->intr_lat    = q->coalesce_usecs;
2675 		t->cong_thres  = q->cong_thres;
2676 		t->qnum        = i;
2677 
2678 		if ((sc->flags & FULL_INIT_DONE) == 0)
2679 			t->vector = 0;
2680 		else if (sc->flags & USING_MSIX)
2681 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2682 		else
2683 			t->vector = rman_get_start(sc->irq_res);
2684 
2685 		break;
2686 	}
2687 	case CHELSIO_GET_QSET_NUM: {
2688 		struct ch_reg *edata = (struct ch_reg *)data;
2689 		edata->val = pi->nqsets;
2690 		break;
2691 	}
2692 	case CHELSIO_LOAD_FW: {
2693 		uint8_t *fw_data;
2694 		uint32_t vers;
2695 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2696 
2697 		/*
2698 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2699 		 *
2700 		 * FW_UPTODATE is also set so the rest of the initialization
2701 		 * will not overwrite what was loaded here.  This gives you the
2702 		 * flexibility to load any firmware (and maybe shoot yourself in
2703 		 * the foot).
2704 		 */
2705 
2706 		ADAPTER_LOCK(sc);
2707 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2708 			ADAPTER_UNLOCK(sc);
2709 			return (EBUSY);
2710 		}
2711 
2712 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2713 		if (!fw_data)
2714 			error = ENOMEM;
2715 		else
2716 			error = copyin(t->buf, fw_data, t->len);
2717 
2718 		if (!error)
2719 			error = -t3_load_fw(sc, fw_data, t->len);
2720 
2721 		if (t3_get_fw_version(sc, &vers) == 0) {
2722 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2723 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2724 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2725 		}
2726 
2727 		if (!error)
2728 			sc->flags |= FW_UPTODATE;
2729 
2730 		free(fw_data, M_DEVBUF);
2731 		ADAPTER_UNLOCK(sc);
2732 		break;
2733 	}
2734 	case CHELSIO_LOAD_BOOT: {
2735 		uint8_t *boot_data;
2736 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2737 
2738 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2739 		if (!boot_data)
2740 			return ENOMEM;
2741 
2742 		error = copyin(t->buf, boot_data, t->len);
2743 		if (!error)
2744 			error = -t3_load_boot(sc, boot_data, t->len);
2745 
2746 		free(boot_data, M_DEVBUF);
2747 		break;
2748 	}
2749 	case CHELSIO_GET_PM: {
2750 		struct ch_pm *m = (struct ch_pm *)data;
2751 		struct tp_params *p = &sc->params.tp;
2752 
2753 		if (!is_offload(sc))
2754 			return (EOPNOTSUPP);
2755 
2756 		m->tx_pg_sz = p->tx_pg_size;
2757 		m->tx_num_pg = p->tx_num_pgs;
2758 		m->rx_pg_sz  = p->rx_pg_size;
2759 		m->rx_num_pg = p->rx_num_pgs;
2760 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2761 
2762 		break;
2763 	}
2764 	case CHELSIO_SET_PM: {
2765 		struct ch_pm *m = (struct ch_pm *)data;
2766 		struct tp_params *p = &sc->params.tp;
2767 
2768 		if (!is_offload(sc))
2769 			return (EOPNOTSUPP);
2770 		if (sc->flags & FULL_INIT_DONE)
2771 			return (EBUSY);
2772 
2773 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2774 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2775 			return (EINVAL);	/* not power of 2 */
2776 		if (!(m->rx_pg_sz & 0x14000))
2777 			return (EINVAL);	/* not 16KB or 64KB */
2778 		if (!(m->tx_pg_sz & 0x1554000))
2779 			return (EINVAL);
2780 		if (m->tx_num_pg == -1)
2781 			m->tx_num_pg = p->tx_num_pgs;
2782 		if (m->rx_num_pg == -1)
2783 			m->rx_num_pg = p->rx_num_pgs;
2784 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2785 			return (EINVAL);
2786 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2787 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2788 			return (EINVAL);
2789 
2790 		p->rx_pg_size = m->rx_pg_sz;
2791 		p->tx_pg_size = m->tx_pg_sz;
2792 		p->rx_num_pgs = m->rx_num_pg;
2793 		p->tx_num_pgs = m->tx_num_pg;
2794 		break;
2795 	}
2796 	case CHELSIO_SETMTUTAB: {
2797 		struct ch_mtus *m = (struct ch_mtus *)data;
2798 		int i;
2799 
2800 		if (!is_offload(sc))
2801 			return (EOPNOTSUPP);
2802 		if (offload_running(sc))
2803 			return (EBUSY);
2804 		if (m->nmtus != NMTUS)
2805 			return (EINVAL);
2806 		if (m->mtus[0] < 81)         /* accommodate SACK */
2807 			return (EINVAL);
2808 
2809 		/*
2810 		 * MTUs must be in ascending order
2811 		 */
2812 		for (i = 1; i < NMTUS; ++i)
2813 			if (m->mtus[i] < m->mtus[i - 1])
2814 				return (EINVAL);
2815 
2816 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2817 		break;
2818 	}
2819 	case CHELSIO_GETMTUTAB: {
2820 		struct ch_mtus *m = (struct ch_mtus *)data;
2821 
2822 		if (!is_offload(sc))
2823 			return (EOPNOTSUPP);
2824 
2825 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2826 		m->nmtus = NMTUS;
2827 		break;
2828 	}
2829 	case CHELSIO_GET_MEM: {
2830 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2831 		struct mc7 *mem;
2832 		uint8_t *useraddr;
2833 		u64 buf[32];
2834 
2835 		/*
2836 		 * Use these to avoid modifying len/addr in the return
2837 		 * struct
2838 		 */
2839 		uint32_t len = t->len, addr = t->addr;
2840 
2841 		if (!is_offload(sc))
2842 			return (EOPNOTSUPP);
2843 		if (!(sc->flags & FULL_INIT_DONE))
2844 			return (EIO);         /* need the memory controllers */
2845 		if ((addr & 0x7) || (len & 0x7))
2846 			return (EINVAL);
2847 		if (t->mem_id == MEM_CM)
2848 			mem = &sc->cm;
2849 		else if (t->mem_id == MEM_PMRX)
2850 			mem = &sc->pmrx;
2851 		else if (t->mem_id == MEM_PMTX)
2852 			mem = &sc->pmtx;
2853 		else
2854 			return (EINVAL);
2855 
2856 		/*
2857 		 * Version scheme:
2858 		 * bits 0..9: chip version
2859 		 * bits 10..15: chip revision
2860 		 */
2861 		t->version = 3 | (sc->params.rev << 10);
2862 
2863 		/*
2864 		 * Read 256 bytes at a time as len can be large and we don't
2865 		 * want to use huge intermediate buffers.
2866 		 */
2867 		useraddr = (uint8_t *)t->buf;
2868 		while (len) {
2869 			unsigned int chunk = min(len, sizeof(buf));
2870 
2871 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2872 			if (error)
2873 				return (-error);
2874 			if (copyout(buf, useraddr, chunk))
2875 				return (EFAULT);
2876 			useraddr += chunk;
2877 			addr += chunk;
2878 			len -= chunk;
2879 		}
2880 		break;
2881 	}
2882 	case CHELSIO_READ_TCAM_WORD: {
2883 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2884 
2885 		if (!is_offload(sc))
2886 			return (EOPNOTSUPP);
2887 		if (!(sc->flags & FULL_INIT_DONE))
2888 			return (EIO);         /* need MC5 */
2889 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2890 		break;
2891 	}
2892 	case CHELSIO_SET_TRACE_FILTER: {
2893 		struct ch_trace *t = (struct ch_trace *)data;
2894 		const struct trace_params *tp;
2895 
2896 		tp = (const struct trace_params *)&t->sip;
2897 		if (t->config_tx)
2898 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2899 					       t->trace_tx);
2900 		if (t->config_rx)
2901 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2902 					       t->trace_rx);
2903 		break;
2904 	}
2905 	case CHELSIO_SET_PKTSCHED: {
2906 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2907 		if (sc->open_device_map == 0)
2908 			return (EAGAIN);
2909 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2910 		    p->binding);
2911 		break;
2912 	}
2913 	case CHELSIO_IFCONF_GETREGS: {
2914 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2915 		int reglen = cxgb_get_regs_len();
2916 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2917 		if (buf == NULL) {
2918 			return (ENOMEM);
2919 		}
2920 		if (regs->len > reglen)
2921 			regs->len = reglen;
2922 		else if (regs->len < reglen)
2923 			error = ENOBUFS;
2924 
2925 		if (!error) {
2926 			cxgb_get_regs(sc, regs, buf);
2927 			error = copyout(buf, regs->data, reglen);
2928 		}
2929 		free(buf, M_DEVBUF);
2930 
2931 		break;
2932 	}
2933 	case CHELSIO_SET_HW_SCHED: {
2934 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2935 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2936 
2937 		if ((sc->flags & FULL_INIT_DONE) == 0)
2938 			return (EAGAIN);       /* need TP to be initialized */
2939 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2940 		    !in_range(t->channel, 0, 1) ||
2941 		    !in_range(t->kbps, 0, 10000000) ||
2942 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2943 		    !in_range(t->flow_ipg, 0,
2944 			      dack_ticks_to_usec(sc, 0x7ff)))
2945 			return (EINVAL);
2946 
2947 		if (t->kbps >= 0) {
2948 			error = t3_config_sched(sc, t->kbps, t->sched);
2949 			if (error < 0)
2950 				return (-error);
2951 		}
2952 		if (t->class_ipg >= 0)
2953 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2954 		if (t->flow_ipg >= 0) {
2955 			t->flow_ipg *= 1000;     /* us -> ns */
2956 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2957 		}
2958 		if (t->mode >= 0) {
2959 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2960 
2961 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2962 					 bit, t->mode ? bit : 0);
2963 		}
2964 		if (t->channel >= 0)
2965 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2966 					 1 << t->sched, t->channel << t->sched);
2967 		break;
2968 	}
2969 	case CHELSIO_GET_EEPROM: {
2970 		int i;
2971 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2972 		uint8_t *buf;
2973 
2974 		if (e->offset & 3 || e->offset >= EEPROMSIZE ||
2975 		    e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) {
2976 			return (EINVAL);
2977 		}
2978 
2979 		buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2980 		if (buf == NULL) {
2981 			return (ENOMEM);
2982 		}
2983 		e->magic = EEPROM_MAGIC;
2984 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2985 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2986 
2987 		if (!error)
2988 			error = copyout(buf + e->offset, e->data, e->len);
2989 
2990 		free(buf, M_DEVBUF);
2991 		break;
2992 	}
2993 	case CHELSIO_CLEAR_STATS: {
2994 		if (!(sc->flags & FULL_INIT_DONE))
2995 			return EAGAIN;
2996 
2997 		PORT_LOCK(pi);
2998 		t3_mac_update_stats(&pi->mac);
2999 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3000 		PORT_UNLOCK(pi);
3001 		break;
3002 	}
3003 	case CHELSIO_GET_UP_LA: {
3004 		struct ch_up_la *la = (struct ch_up_la *)data;
3005 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3006 		if (buf == NULL) {
3007 			return (ENOMEM);
3008 		}
3009 		if (la->bufsize < LA_BUFSIZE)
3010 			error = ENOBUFS;
3011 
3012 		if (!error)
3013 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3014 					      &la->bufsize, buf);
3015 		if (!error)
3016 			error = copyout(buf, la->data, la->bufsize);
3017 
3018 		free(buf, M_DEVBUF);
3019 		break;
3020 	}
3021 	case CHELSIO_GET_UP_IOQS: {
3022 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3023 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3024 		uint32_t *v;
3025 
3026 		if (buf == NULL) {
3027 			return (ENOMEM);
3028 		}
3029 		if (ioqs->bufsize < IOQS_BUFSIZE)
3030 			error = ENOBUFS;
3031 
3032 		if (!error)
3033 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3034 
3035 		if (!error) {
3036 			v = (uint32_t *)buf;
3037 
3038 			ioqs->ioq_rx_enable = *v++;
3039 			ioqs->ioq_tx_enable = *v++;
3040 			ioqs->ioq_rx_status = *v++;
3041 			ioqs->ioq_tx_status = *v++;
3042 
3043 			error = copyout(v, ioqs->data, ioqs->bufsize);
3044 		}
3045 
3046 		free(buf, M_DEVBUF);
3047 		break;
3048 	}
3049 	case CHELSIO_SET_FILTER: {
3050 		struct ch_filter *f = (struct ch_filter *)data;
3051 		struct filter_info *p;
3052 		unsigned int nfilters = sc->params.mc5.nfilters;
3053 
3054 		if (!is_offload(sc))
3055 			return (EOPNOTSUPP);	/* No TCAM */
3056 		if (!(sc->flags & FULL_INIT_DONE))
3057 			return (EAGAIN);	/* mc5 not setup yet */
3058 		if (nfilters == 0)
3059 			return (EBUSY);		/* TOE will use TCAM */
3060 
3061 		/* sanity checks */
3062 		if (f->filter_id >= nfilters ||
3063 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3064 		    (f->val.sport && f->mask.sport != 0xffff) ||
3065 		    (f->val.dport && f->mask.dport != 0xffff) ||
3066 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3067 		    (f->val.vlan_prio &&
3068 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3069 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3070 		    f->qset >= SGE_QSETS ||
3071 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3072 			return (EINVAL);
3073 
3074 		/* Was allocated with M_WAITOK */
3075 		KASSERT(sc->filters, ("filter table NULL\n"));
3076 
3077 		p = &sc->filters[f->filter_id];
3078 		if (p->locked)
3079 			return (EPERM);
3080 
3081 		bzero(p, sizeof(*p));
3082 		p->sip = f->val.sip;
3083 		p->sip_mask = f->mask.sip;
3084 		p->dip = f->val.dip;
3085 		p->sport = f->val.sport;
3086 		p->dport = f->val.dport;
3087 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3088 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3089 		    FILTER_NO_VLAN_PRI;
3090 		p->mac_hit = f->mac_hit;
3091 		p->mac_vld = f->mac_addr_idx != 0xffff;
3092 		p->mac_idx = f->mac_addr_idx;
3093 		p->pkt_type = f->proto;
3094 		p->report_filter_id = f->want_filter_id;
3095 		p->pass = f->pass;
3096 		p->rss = f->rss;
3097 		p->qset = f->qset;
3098 
3099 		error = set_filter(sc, f->filter_id, p);
3100 		if (error == 0)
3101 			p->valid = 1;
3102 		break;
3103 	}
3104 	case CHELSIO_DEL_FILTER: {
3105 		struct ch_filter *f = (struct ch_filter *)data;
3106 		struct filter_info *p;
3107 		unsigned int nfilters = sc->params.mc5.nfilters;
3108 
3109 		if (!is_offload(sc))
3110 			return (EOPNOTSUPP);
3111 		if (!(sc->flags & FULL_INIT_DONE))
3112 			return (EAGAIN);
3113 		if (nfilters == 0 || sc->filters == NULL)
3114 			return (EINVAL);
3115 		if (f->filter_id >= nfilters)
3116 		       return (EINVAL);
3117 
3118 		p = &sc->filters[f->filter_id];
3119 		if (p->locked)
3120 			return (EPERM);
3121 		if (!p->valid)
3122 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3123 
3124 		bzero(p, sizeof(*p));
3125 		p->sip = p->sip_mask = 0xffffffff;
3126 		p->vlan = 0xfff;
3127 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3128 		p->pkt_type = 1;
3129 		error = set_filter(sc, f->filter_id, p);
3130 		break;
3131 	}
3132 	case CHELSIO_GET_FILTER: {
3133 		struct ch_filter *f = (struct ch_filter *)data;
3134 		struct filter_info *p;
3135 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3136 
3137 		if (!is_offload(sc))
3138 			return (EOPNOTSUPP);
3139 		if (!(sc->flags & FULL_INIT_DONE))
3140 			return (EAGAIN);
3141 		if (nfilters == 0 || sc->filters == NULL)
3142 			return (EINVAL);
3143 
3144 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3145 		for (; i < nfilters; i++) {
3146 			p = &sc->filters[i];
3147 			if (!p->valid)
3148 				continue;
3149 
3150 			bzero(f, sizeof(*f));
3151 
3152 			f->filter_id = i;
3153 			f->val.sip = p->sip;
3154 			f->mask.sip = p->sip_mask;
3155 			f->val.dip = p->dip;
3156 			f->mask.dip = p->dip ? 0xffffffff : 0;
3157 			f->val.sport = p->sport;
3158 			f->mask.sport = p->sport ? 0xffff : 0;
3159 			f->val.dport = p->dport;
3160 			f->mask.dport = p->dport ? 0xffff : 0;
3161 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3162 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3163 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3164 			    0 : p->vlan_prio;
3165 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3166 			    0 : FILTER_NO_VLAN_PRI;
3167 			f->mac_hit = p->mac_hit;
3168 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3169 			f->proto = p->pkt_type;
3170 			f->want_filter_id = p->report_filter_id;
3171 			f->pass = p->pass;
3172 			f->rss = p->rss;
3173 			f->qset = p->qset;
3174 
3175 			break;
3176 		}
3177 
3178 		if (i == nfilters)
3179 			f->filter_id = 0xffffffff;
3180 		break;
3181 	}
3182 	default:
3183 		return (EOPNOTSUPP);
3184 		break;
3185 	}
3186 
3187 	return (error);
3188 }
3189 
3190 static __inline void
3191 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3192     unsigned int end)
3193 {
3194 	uint32_t *p = (uint32_t *)(buf + start);
3195 
3196 	for ( ; start <= end; start += sizeof(uint32_t))
3197 		*p++ = t3_read_reg(ap, start);
3198 }
3199 
3200 #define T3_REGMAP_SIZE (3 * 1024)
3201 static int
3202 cxgb_get_regs_len(void)
3203 {
3204 	return T3_REGMAP_SIZE;
3205 }
3206 
3207 static void
3208 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3209 {
3210 
3211 	/*
3212 	 * Version scheme:
3213 	 * bits 0..9: chip version
3214 	 * bits 10..15: chip revision
3215 	 * bit 31: set for PCIe cards
3216 	 */
3217 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3218 
3219 	/*
3220 	 * We skip the MAC statistics registers because they are clear-on-read.
3221 	 * Also reading multi-register stats would need to synchronize with the
3222 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3223 	 */
3224 	memset(buf, 0, cxgb_get_regs_len());
3225 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3226 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3227 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3228 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3229 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3230 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3231 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3232 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3233 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3234 }
3235 
3236 static int
3237 alloc_filters(struct adapter *sc)
3238 {
3239 	struct filter_info *p;
3240 	unsigned int nfilters = sc->params.mc5.nfilters;
3241 
3242 	if (nfilters == 0)
3243 		return (0);
3244 
3245 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3246 	sc->filters = p;
3247 
3248 	p = &sc->filters[nfilters - 1];
3249 	p->vlan = 0xfff;
3250 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3251 	p->pass = p->rss = p->valid = p->locked = 1;
3252 
3253 	return (0);
3254 }
3255 
3256 static int
3257 setup_hw_filters(struct adapter *sc)
3258 {
3259 	int i, rc;
3260 	unsigned int nfilters = sc->params.mc5.nfilters;
3261 
3262 	if (!sc->filters)
3263 		return (0);
3264 
3265 	t3_enable_filters(sc);
3266 
3267 	for (i = rc = 0; i < nfilters && !rc; i++) {
3268 		if (sc->filters[i].locked)
3269 			rc = set_filter(sc, i, &sc->filters[i]);
3270 	}
3271 
3272 	return (rc);
3273 }
3274 
3275 static int
3276 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3277 {
3278 	int len;
3279 	struct mbuf *m;
3280 	struct ulp_txpkt *txpkt;
3281 	struct work_request_hdr *wr;
3282 	struct cpl_pass_open_req *oreq;
3283 	struct cpl_set_tcb_field *sreq;
3284 
3285 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3286 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3287 
3288 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3289 	      sc->params.mc5.nfilters;
3290 
3291 	m = m_gethdr(M_WAITOK, MT_DATA);
3292 	m->m_len = m->m_pkthdr.len = len;
3293 	bzero(mtod(m, char *), len);
3294 
3295 	wr = mtod(m, struct work_request_hdr *);
3296 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3297 
3298 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3299 	txpkt = (struct ulp_txpkt *)oreq;
3300 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3301 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3302 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3303 	oreq->local_port = htons(f->dport);
3304 	oreq->peer_port = htons(f->sport);
3305 	oreq->local_ip = htonl(f->dip);
3306 	oreq->peer_ip = htonl(f->sip);
3307 	oreq->peer_netmask = htonl(f->sip_mask);
3308 	oreq->opt0h = 0;
3309 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3310 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3311 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3312 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3313 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3314 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3315 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3316 
3317 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3318 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3319 			  (f->report_filter_id << 15) | (1 << 23) |
3320 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3321 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3322 	t3_mgmt_tx(sc, m);
3323 
3324 	if (f->pass && !f->rss) {
3325 		len = sizeof(*sreq);
3326 		m = m_gethdr(M_WAITOK, MT_DATA);
3327 		m->m_len = m->m_pkthdr.len = len;
3328 		bzero(mtod(m, char *), len);
3329 		sreq = mtod(m, struct cpl_set_tcb_field *);
3330 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3331 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3332 				 (u64)sc->rrss_map[f->qset] << 19);
3333 		t3_mgmt_tx(sc, m);
3334 	}
3335 	return 0;
3336 }
3337 
3338 static inline void
3339 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3340     unsigned int word, u64 mask, u64 val)
3341 {
3342 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3343 	req->reply = V_NO_REPLY(1);
3344 	req->cpu_idx = 0;
3345 	req->word = htons(word);
3346 	req->mask = htobe64(mask);
3347 	req->val = htobe64(val);
3348 }
3349 
3350 static inline void
3351 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3352     unsigned int word, u64 mask, u64 val)
3353 {
3354 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3355 
3356 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3357 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3358 	mk_set_tcb_field(req, tid, word, mask, val);
3359 }
3360 
3361 void
3362 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3363 {
3364 	struct adapter *sc;
3365 
3366 	mtx_lock(&t3_list_lock);
3367 	SLIST_FOREACH(sc, &t3_list, link) {
3368 		/*
3369 		 * func should not make any assumptions about what state sc is
3370 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3371 		 */
3372 		func(sc, arg);
3373 	}
3374 	mtx_unlock(&t3_list_lock);
3375 }
3376 
3377 #ifdef TCP_OFFLOAD
3378 static int
3379 toe_capability(struct port_info *pi, int enable)
3380 {
3381 	int rc;
3382 	struct adapter *sc = pi->adapter;
3383 
3384 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3385 
3386 	if (!is_offload(sc))
3387 		return (ENODEV);
3388 
3389 	if (enable) {
3390 		if (!(sc->flags & FULL_INIT_DONE)) {
3391 			log(LOG_WARNING,
3392 			    "You must enable a cxgb interface first\n");
3393 			return (EAGAIN);
3394 		}
3395 
3396 		if (isset(&sc->offload_map, pi->port_id))
3397 			return (0);
3398 
3399 		if (!(sc->flags & TOM_INIT_DONE)) {
3400 			rc = t3_activate_uld(sc, ULD_TOM);
3401 			if (rc == EAGAIN) {
3402 				log(LOG_WARNING,
3403 				    "You must kldload t3_tom.ko before trying "
3404 				    "to enable TOE on a cxgb interface.\n");
3405 			}
3406 			if (rc != 0)
3407 				return (rc);
3408 			KASSERT(sc->tom_softc != NULL,
3409 			    ("%s: TOM activated but softc NULL", __func__));
3410 			KASSERT(sc->flags & TOM_INIT_DONE,
3411 			    ("%s: TOM activated but flag not set", __func__));
3412 		}
3413 
3414 		setbit(&sc->offload_map, pi->port_id);
3415 
3416 		/*
3417 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3418 		 * enabled on any port.  Need to figure out how to enable,
3419 		 * disable, load, and unload iWARP cleanly.
3420 		 */
3421 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3422 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3423 			setbit(&sc->offload_map, MAX_NPORTS);
3424 	} else {
3425 		if (!isset(&sc->offload_map, pi->port_id))
3426 			return (0);
3427 
3428 		KASSERT(sc->flags & TOM_INIT_DONE,
3429 		    ("%s: TOM never initialized?", __func__));
3430 		clrbit(&sc->offload_map, pi->port_id);
3431 	}
3432 
3433 	return (0);
3434 }
3435 
3436 /*
3437  * Add an upper layer driver to the global list.
3438  */
3439 int
3440 t3_register_uld(struct uld_info *ui)
3441 {
3442 	int rc = 0;
3443 	struct uld_info *u;
3444 
3445 	mtx_lock(&t3_uld_list_lock);
3446 	SLIST_FOREACH(u, &t3_uld_list, link) {
3447 	    if (u->uld_id == ui->uld_id) {
3448 		    rc = EEXIST;
3449 		    goto done;
3450 	    }
3451 	}
3452 
3453 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3454 	ui->refcount = 0;
3455 done:
3456 	mtx_unlock(&t3_uld_list_lock);
3457 	return (rc);
3458 }
3459 
3460 int
3461 t3_unregister_uld(struct uld_info *ui)
3462 {
3463 	int rc = EINVAL;
3464 	struct uld_info *u;
3465 
3466 	mtx_lock(&t3_uld_list_lock);
3467 
3468 	SLIST_FOREACH(u, &t3_uld_list, link) {
3469 	    if (u == ui) {
3470 		    if (ui->refcount > 0) {
3471 			    rc = EBUSY;
3472 			    goto done;
3473 		    }
3474 
3475 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3476 		    rc = 0;
3477 		    goto done;
3478 	    }
3479 	}
3480 done:
3481 	mtx_unlock(&t3_uld_list_lock);
3482 	return (rc);
3483 }
3484 
3485 int
3486 t3_activate_uld(struct adapter *sc, int id)
3487 {
3488 	int rc = EAGAIN;
3489 	struct uld_info *ui;
3490 
3491 	mtx_lock(&t3_uld_list_lock);
3492 
3493 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3494 		if (ui->uld_id == id) {
3495 			rc = ui->activate(sc);
3496 			if (rc == 0)
3497 				ui->refcount++;
3498 			goto done;
3499 		}
3500 	}
3501 done:
3502 	mtx_unlock(&t3_uld_list_lock);
3503 
3504 	return (rc);
3505 }
3506 
3507 int
3508 t3_deactivate_uld(struct adapter *sc, int id)
3509 {
3510 	int rc = EINVAL;
3511 	struct uld_info *ui;
3512 
3513 	mtx_lock(&t3_uld_list_lock);
3514 
3515 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3516 		if (ui->uld_id == id) {
3517 			rc = ui->deactivate(sc);
3518 			if (rc == 0)
3519 				ui->refcount--;
3520 			goto done;
3521 		}
3522 	}
3523 done:
3524 	mtx_unlock(&t3_uld_list_lock);
3525 
3526 	return (rc);
3527 }
3528 
3529 static int
3530 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3531     struct mbuf *m)
3532 {
3533 	m_freem(m);
3534 	return (EDOOFUS);
3535 }
3536 
3537 int
3538 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3539 {
3540 	uintptr_t *loc, new;
3541 
3542 	if (opcode >= NUM_CPL_HANDLERS)
3543 		return (EINVAL);
3544 
3545 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3546 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3547 	atomic_store_rel_ptr(loc, new);
3548 
3549 	return (0);
3550 }
3551 #endif
3552 
3553 static int
3554 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3555 {
3556 	int rc = 0;
3557 
3558 	switch (cmd) {
3559 	case MOD_LOAD:
3560 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3561 		SLIST_INIT(&t3_list);
3562 #ifdef TCP_OFFLOAD
3563 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3564 		SLIST_INIT(&t3_uld_list);
3565 #endif
3566 		break;
3567 
3568 	case MOD_UNLOAD:
3569 #ifdef TCP_OFFLOAD
3570 		mtx_lock(&t3_uld_list_lock);
3571 		if (!SLIST_EMPTY(&t3_uld_list)) {
3572 			rc = EBUSY;
3573 			mtx_unlock(&t3_uld_list_lock);
3574 			break;
3575 		}
3576 		mtx_unlock(&t3_uld_list_lock);
3577 		mtx_destroy(&t3_uld_list_lock);
3578 #endif
3579 		mtx_lock(&t3_list_lock);
3580 		if (!SLIST_EMPTY(&t3_list)) {
3581 			rc = EBUSY;
3582 			mtx_unlock(&t3_list_lock);
3583 			break;
3584 		}
3585 		mtx_unlock(&t3_list_lock);
3586 		mtx_destroy(&t3_list_lock);
3587 		break;
3588 	}
3589 
3590 	return (rc);
3591 }
3592 
3593 #ifdef NETDUMP
3594 static void
3595 cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
3596 {
3597 	struct port_info *pi;
3598 	adapter_t *adap;
3599 
3600 	pi = if_getsoftc(ifp);
3601 	adap = pi->adapter;
3602 	ADAPTER_LOCK(adap);
3603 	*nrxr = adap->nqsets;
3604 	*ncl = adap->sge.qs[0].fl[1].size;
3605 	*clsize = adap->sge.qs[0].fl[1].buf_size;
3606 	ADAPTER_UNLOCK(adap);
3607 }
3608 
3609 static void
3610 cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event)
3611 {
3612 	struct port_info *pi;
3613 	struct sge_qset *qs;
3614 	int i;
3615 
3616 	pi = if_getsoftc(ifp);
3617 	if (event == NETDUMP_START)
3618 		for (i = 0; i < pi->adapter->nqsets; i++) {
3619 			qs = &pi->adapter->sge.qs[i];
3620 
3621 			/* Need to reinit after netdump_mbuf_dump(). */
3622 			qs->fl[0].zone = zone_pack;
3623 			qs->fl[1].zone = zone_clust;
3624 			qs->lro.enabled = 0;
3625 		}
3626 }
3627 
3628 static int
3629 cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m)
3630 {
3631 	struct port_info *pi;
3632 	struct sge_qset *qs;
3633 
3634 	pi = if_getsoftc(ifp);
3635 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3636 	    IFF_DRV_RUNNING)
3637 		return (ENOENT);
3638 
3639 	qs = &pi->adapter->sge.qs[pi->first_qset];
3640 	return (cxgb_netdump_encap(qs, &m));
3641 }
3642 
3643 static int
3644 cxgb_netdump_poll(struct ifnet *ifp, int count)
3645 {
3646 	struct port_info *pi;
3647 	adapter_t *adap;
3648 	int i;
3649 
3650 	pi = if_getsoftc(ifp);
3651 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3652 		return (ENOENT);
3653 
3654 	adap = pi->adapter;
3655 	for (i = 0; i < adap->nqsets; i++)
3656 		(void)cxgb_netdump_poll_rx(adap, &adap->sge.qs[i]);
3657 	(void)cxgb_netdump_poll_tx(&adap->sge.qs[pi->first_qset]);
3658 	return (0);
3659 }
3660 #endif /* NETDUMP */
3661