xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "opt_inet.h"
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/bus.h>
40 #include <sys/module.h>
41 #include <sys/pciio.h>
42 #include <sys/conf.h>
43 #include <machine/bus.h>
44 #include <machine/resource.h>
45 #include <sys/ktr.h>
46 #include <sys/rman.h>
47 #include <sys/ioccom.h>
48 #include <sys/mbuf.h>
49 #include <sys/linker.h>
50 #include <sys/firmware.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/smp.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/queue.h>
57 #include <sys/taskqueue.h>
58 #include <sys/proc.h>
59 
60 #include <net/bpf.h>
61 #include <net/debugnet.h>
62 #include <net/ethernet.h>
63 #include <net/if.h>
64 #include <net/if_var.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/if_types.h>
69 #include <net/if_vlan_var.h>
70 
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/if_ether.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip.h>
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 
79 #include <dev/pci/pcireg.h>
80 #include <dev/pci/pcivar.h>
81 #include <dev/pci/pci_private.h>
82 
83 #include <cxgb_include.h>
84 
85 #ifdef PRIV_SUPPORTED
86 #include <sys/priv.h>
87 #endif
88 
89 static int cxgb_setup_interrupts(adapter_t *);
90 static void cxgb_teardown_interrupts(adapter_t *);
91 static void cxgb_init(void *);
92 static int cxgb_init_locked(struct port_info *);
93 static int cxgb_uninit_locked(struct port_info *);
94 static int cxgb_uninit_synchronized(struct port_info *);
95 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96 static int cxgb_media_change(struct ifnet *);
97 static int cxgb_ifm_type(int);
98 static void cxgb_build_medialist(struct port_info *);
99 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100 static uint64_t cxgb_get_counter(struct ifnet *, ift_counter);
101 static int setup_sge_qsets(adapter_t *);
102 static void cxgb_async_intr(void *);
103 static void cxgb_tick_handler(void *, int);
104 static void cxgb_tick(void *);
105 static void link_check_callout(void *);
106 static void check_link_status(void *, int);
107 static void setup_rss(adapter_t *sc);
108 static int alloc_filters(struct adapter *);
109 static int setup_hw_filters(struct adapter *);
110 static int set_filter(struct adapter *, int, const struct filter_info *);
111 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
112     unsigned int, u64, u64);
113 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
114     unsigned int, u64, u64);
115 #ifdef TCP_OFFLOAD
116 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
117 #endif
118 
119 /* Attachment glue for the PCI controller end of the device.  Each port of
120  * the device is attached separately, as defined later.
121  */
122 static int cxgb_controller_probe(device_t);
123 static int cxgb_controller_attach(device_t);
124 static int cxgb_controller_detach(device_t);
125 static void cxgb_free(struct adapter *);
126 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
127     unsigned int end);
128 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
129 static int cxgb_get_regs_len(void);
130 static void touch_bars(device_t dev);
131 static void cxgb_update_mac_settings(struct port_info *p);
132 #ifdef TCP_OFFLOAD
133 static int toe_capability(struct port_info *, int);
134 #endif
135 
136 /* Table for probing the cards.  The desc field isn't actually used */
137 struct cxgb_ident {
138 	uint16_t	vendor;
139 	uint16_t	device;
140 	int		index;
141 	char		*desc;
142 } cxgb_identifiers[] = {
143 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
144 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
145 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
146 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
147 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
148 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
149 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
150 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
151 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
152 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
153 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
154 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
155 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
156 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
157 	{0, 0, 0, NULL}
158 };
159 
160 static device_method_t cxgb_controller_methods[] = {
161 	DEVMETHOD(device_probe,		cxgb_controller_probe),
162 	DEVMETHOD(device_attach,	cxgb_controller_attach),
163 	DEVMETHOD(device_detach,	cxgb_controller_detach),
164 
165 	DEVMETHOD_END
166 };
167 
168 static driver_t cxgb_controller_driver = {
169 	"cxgbc",
170 	cxgb_controller_methods,
171 	sizeof(struct adapter)
172 };
173 
174 static int cxgbc_mod_event(module_t, int, void *);
175 static devclass_t	cxgb_controller_devclass;
176 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
177     cxgbc_mod_event, 0);
178 MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
179     nitems(cxgb_identifiers) - 1);
180 MODULE_VERSION(cxgbc, 1);
181 MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
182 
183 /*
184  * Attachment glue for the ports.  Attachment is done directly to the
185  * controller device.
186  */
187 static int cxgb_port_probe(device_t);
188 static int cxgb_port_attach(device_t);
189 static int cxgb_port_detach(device_t);
190 
191 static device_method_t cxgb_port_methods[] = {
192 	DEVMETHOD(device_probe,		cxgb_port_probe),
193 	DEVMETHOD(device_attach,	cxgb_port_attach),
194 	DEVMETHOD(device_detach,	cxgb_port_detach),
195 	{ 0, 0 }
196 };
197 
198 static driver_t cxgb_port_driver = {
199 	"cxgb",
200 	cxgb_port_methods,
201 	0
202 };
203 
204 static d_ioctl_t cxgb_extension_ioctl;
205 static d_open_t cxgb_extension_open;
206 static d_close_t cxgb_extension_close;
207 
208 static struct cdevsw cxgb_cdevsw = {
209        .d_version =    D_VERSION,
210        .d_flags =      0,
211        .d_open =       cxgb_extension_open,
212        .d_close =      cxgb_extension_close,
213        .d_ioctl =      cxgb_extension_ioctl,
214        .d_name =       "cxgb",
215 };
216 
217 static devclass_t	cxgb_port_devclass;
218 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
219 MODULE_VERSION(cxgb, 1);
220 
221 DEBUGNET_DEFINE(cxgb);
222 
223 static struct mtx t3_list_lock;
224 static SLIST_HEAD(, adapter) t3_list;
225 #ifdef TCP_OFFLOAD
226 static struct mtx t3_uld_list_lock;
227 static SLIST_HEAD(, uld_info) t3_uld_list;
228 #endif
229 
230 /*
231  * The driver uses the best interrupt scheme available on a platform in the
232  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
233  * of these schemes the driver may consider as follows:
234  *
235  * msi = 2: choose from among all three options
236  * msi = 1 : only consider MSI and pin interrupts
237  * msi = 0: force pin interrupts
238  */
239 static int msi_allowed = 2;
240 
241 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
242     "CXGB driver parameters");
243 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
244     "MSI-X, MSI, INTx selector");
245 
246 /*
247  * The driver uses an auto-queue algorithm by default.
248  * To disable it and force a single queue-set per port, use multiq = 0
249  */
250 static int multiq = 1;
251 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
252     "use min(ncpus/ports, 8) queue-sets per port");
253 
254 /*
255  * By default the driver will not update the firmware unless
256  * it was compiled against a newer version
257  *
258  */
259 static int force_fw_update = 0;
260 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
261     "update firmware even if up to date");
262 
263 int cxgb_use_16k_clusters = -1;
264 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
265     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
266 
267 static int nfilters = -1;
268 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
269     &nfilters, 0, "max number of entries in the filter table");
270 
271 enum {
272 	MAX_TXQ_ENTRIES      = 16384,
273 	MAX_CTRL_TXQ_ENTRIES = 1024,
274 	MAX_RSPQ_ENTRIES     = 16384,
275 	MAX_RX_BUFFERS       = 16384,
276 	MAX_RX_JUMBO_BUFFERS = 16384,
277 	MIN_TXQ_ENTRIES      = 4,
278 	MIN_CTRL_TXQ_ENTRIES = 4,
279 	MIN_RSPQ_ENTRIES     = 32,
280 	MIN_FL_ENTRIES       = 32,
281 	MIN_FL_JUMBO_ENTRIES = 32
282 };
283 
284 struct filter_info {
285 	u32 sip;
286 	u32 sip_mask;
287 	u32 dip;
288 	u16 sport;
289 	u16 dport;
290 	u32 vlan:12;
291 	u32 vlan_prio:3;
292 	u32 mac_hit:1;
293 	u32 mac_idx:4;
294 	u32 mac_vld:1;
295 	u32 pkt_type:2;
296 	u32 report_filter_id:1;
297 	u32 pass:1;
298 	u32 rss:1;
299 	u32 qset:3;
300 	u32 locked:1;
301 	u32 valid:1;
302 };
303 
304 enum { FILTER_NO_VLAN_PRI = 7 };
305 
306 #define EEPROM_MAGIC 0x38E2F10C
307 
308 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
309 
310 
311 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
312 
313 
314 static __inline char
315 t3rev2char(struct adapter *adapter)
316 {
317 	char rev = 'z';
318 
319 	switch(adapter->params.rev) {
320 	case T3_REV_A:
321 		rev = 'a';
322 		break;
323 	case T3_REV_B:
324 	case T3_REV_B2:
325 		rev = 'b';
326 		break;
327 	case T3_REV_C:
328 		rev = 'c';
329 		break;
330 	}
331 	return rev;
332 }
333 
334 static struct cxgb_ident *
335 cxgb_get_ident(device_t dev)
336 {
337 	struct cxgb_ident *id;
338 
339 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
340 		if ((id->vendor == pci_get_vendor(dev)) &&
341 		    (id->device == pci_get_device(dev))) {
342 			return (id);
343 		}
344 	}
345 	return (NULL);
346 }
347 
348 static const struct adapter_info *
349 cxgb_get_adapter_info(device_t dev)
350 {
351 	struct cxgb_ident *id;
352 	const struct adapter_info *ai;
353 
354 	id = cxgb_get_ident(dev);
355 	if (id == NULL)
356 		return (NULL);
357 
358 	ai = t3_get_adapter_info(id->index);
359 
360 	return (ai);
361 }
362 
363 static int
364 cxgb_controller_probe(device_t dev)
365 {
366 	const struct adapter_info *ai;
367 	char *ports, buf[80];
368 	int nports;
369 
370 	ai = cxgb_get_adapter_info(dev);
371 	if (ai == NULL)
372 		return (ENXIO);
373 
374 	nports = ai->nports0 + ai->nports1;
375 	if (nports == 1)
376 		ports = "port";
377 	else
378 		ports = "ports";
379 
380 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
381 	device_set_desc_copy(dev, buf);
382 	return (BUS_PROBE_DEFAULT);
383 }
384 
385 #define FW_FNAME "cxgb_t3fw"
386 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
387 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
388 
389 static int
390 upgrade_fw(adapter_t *sc)
391 {
392 	const struct firmware *fw;
393 	int status;
394 	u32 vers;
395 
396 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
397 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
398 		return (ENOENT);
399 	} else
400 		device_printf(sc->dev, "installing firmware on card\n");
401 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
402 
403 	if (status != 0) {
404 		device_printf(sc->dev, "failed to install firmware: %d\n",
405 		    status);
406 	} else {
407 		t3_get_fw_version(sc, &vers);
408 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
409 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
410 		    G_FW_VERSION_MICRO(vers));
411 	}
412 
413 	firmware_put(fw, FIRMWARE_UNLOAD);
414 
415 	return (status);
416 }
417 
418 /*
419  * The cxgb_controller_attach function is responsible for the initial
420  * bringup of the device.  Its responsibilities include:
421  *
422  *  1. Determine if the device supports MSI or MSI-X.
423  *  2. Allocate bus resources so that we can access the Base Address Register
424  *  3. Create and initialize mutexes for the controller and its control
425  *     logic such as SGE and MDIO.
426  *  4. Call hardware specific setup routine for the adapter as a whole.
427  *  5. Allocate the BAR for doing MSI-X.
428  *  6. Setup the line interrupt iff MSI-X is not supported.
429  *  7. Create the driver's taskq.
430  *  8. Start one task queue service thread.
431  *  9. Check if the firmware and SRAM are up-to-date.  They will be
432  *     auto-updated later (before FULL_INIT_DONE), if required.
433  * 10. Create a child device for each MAC (port)
434  * 11. Initialize T3 private state.
435  * 12. Trigger the LED
436  * 13. Setup offload iff supported.
437  * 14. Reset/restart the tick callout.
438  * 15. Attach sysctls
439  *
440  * NOTE: Any modification or deviation from this list MUST be reflected in
441  * the above comment.  Failure to do so will result in problems on various
442  * error conditions including link flapping.
443  */
444 static int
445 cxgb_controller_attach(device_t dev)
446 {
447 	device_t child;
448 	const struct adapter_info *ai;
449 	struct adapter *sc;
450 	int i, error = 0;
451 	uint32_t vers;
452 	int port_qsets = 1;
453 	int msi_needed, reg;
454 	char buf[80];
455 
456 	sc = device_get_softc(dev);
457 	sc->dev = dev;
458 	sc->msi_count = 0;
459 	ai = cxgb_get_adapter_info(dev);
460 
461 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
462 	    device_get_unit(dev));
463 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
464 
465 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
466 	    device_get_unit(dev));
467 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
468 	    device_get_unit(dev));
469 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
470 	    device_get_unit(dev));
471 
472 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
473 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
474 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
475 
476 	mtx_lock(&t3_list_lock);
477 	SLIST_INSERT_HEAD(&t3_list, sc, link);
478 	mtx_unlock(&t3_list_lock);
479 
480 	/* find the PCIe link width and set max read request to 4KB*/
481 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
482 		uint16_t lnk;
483 
484 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
485 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
486 		if (sc->link_width < 8 &&
487 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
488 			device_printf(sc->dev,
489 			    "PCIe x%d Link, expect reduced performance\n",
490 			    sc->link_width);
491 		}
492 
493 		pci_set_max_read_req(dev, 4096);
494 	}
495 
496 	touch_bars(dev);
497 	pci_enable_busmaster(dev);
498 	/*
499 	 * Allocate the registers and make them available to the driver.
500 	 * The registers that we care about for NIC mode are in BAR 0
501 	 */
502 	sc->regs_rid = PCIR_BAR(0);
503 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
504 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
505 		device_printf(dev, "Cannot allocate BAR region 0\n");
506 		error = ENXIO;
507 		goto out;
508 	}
509 
510 	sc->bt = rman_get_bustag(sc->regs_res);
511 	sc->bh = rman_get_bushandle(sc->regs_res);
512 	sc->mmio_len = rman_get_size(sc->regs_res);
513 
514 	for (i = 0; i < MAX_NPORTS; i++)
515 		sc->port[i].adapter = sc;
516 
517 	if (t3_prep_adapter(sc, ai, 1) < 0) {
518 		printf("prep adapter failed\n");
519 		error = ENODEV;
520 		goto out;
521 	}
522 
523 	sc->udbs_rid = PCIR_BAR(2);
524 	sc->udbs_res = NULL;
525 	if (is_offload(sc) &&
526 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
527 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
528 		device_printf(dev, "Cannot allocate BAR region 1\n");
529 		error = ENXIO;
530 		goto out;
531 	}
532 
533         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
534 	 * enough messages for the queue sets.  If that fails, try falling
535 	 * back to MSI.  If that fails, then try falling back to the legacy
536 	 * interrupt pin model.
537 	 */
538 	sc->msix_regs_rid = 0x20;
539 	if ((msi_allowed >= 2) &&
540 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
541 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
542 
543 		if (multiq)
544 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
545 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
546 
547 		if (pci_msix_count(dev) == 0 ||
548 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
549 		    sc->msi_count != msi_needed) {
550 			device_printf(dev, "alloc msix failed - "
551 				      "msi_count=%d, msi_needed=%d, err=%d; "
552 				      "will try MSI\n", sc->msi_count,
553 				      msi_needed, error);
554 			sc->msi_count = 0;
555 			port_qsets = 1;
556 			pci_release_msi(dev);
557 			bus_release_resource(dev, SYS_RES_MEMORY,
558 			    sc->msix_regs_rid, sc->msix_regs_res);
559 			sc->msix_regs_res = NULL;
560 		} else {
561 			sc->flags |= USING_MSIX;
562 			sc->cxgb_intr = cxgb_async_intr;
563 			device_printf(dev,
564 				      "using MSI-X interrupts (%u vectors)\n",
565 				      sc->msi_count);
566 		}
567 	}
568 
569 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
570 		sc->msi_count = 1;
571 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
572 			device_printf(dev, "alloc msi failed - "
573 				      "err=%d; will try INTx\n", error);
574 			sc->msi_count = 0;
575 			port_qsets = 1;
576 			pci_release_msi(dev);
577 		} else {
578 			sc->flags |= USING_MSI;
579 			sc->cxgb_intr = t3_intr_msi;
580 			device_printf(dev, "using MSI interrupts\n");
581 		}
582 	}
583 	if (sc->msi_count == 0) {
584 		device_printf(dev, "using line interrupts\n");
585 		sc->cxgb_intr = t3b_intr;
586 	}
587 
588 	/* Create a private taskqueue thread for handling driver events */
589 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
590 	    taskqueue_thread_enqueue, &sc->tq);
591 	if (sc->tq == NULL) {
592 		device_printf(dev, "failed to allocate controller task queue\n");
593 		goto out;
594 	}
595 
596 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
597 	    device_get_nameunit(dev));
598 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
599 
600 
601 	/* Create a periodic callout for checking adapter status */
602 	callout_init(&sc->cxgb_tick_ch, 1);
603 
604 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
605 		/*
606 		 * Warn user that a firmware update will be attempted in init.
607 		 */
608 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
609 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
610 		sc->flags &= ~FW_UPTODATE;
611 	} else {
612 		sc->flags |= FW_UPTODATE;
613 	}
614 
615 	if (t3_check_tpsram_version(sc) < 0) {
616 		/*
617 		 * Warn user that a firmware update will be attempted in init.
618 		 */
619 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
620 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
621 		sc->flags &= ~TPS_UPTODATE;
622 	} else {
623 		sc->flags |= TPS_UPTODATE;
624 	}
625 
626 	/*
627 	 * Create a child device for each MAC.  The ethernet attachment
628 	 * will be done in these children.
629 	 */
630 	for (i = 0; i < (sc)->params.nports; i++) {
631 		struct port_info *pi;
632 
633 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
634 			device_printf(dev, "failed to add child port\n");
635 			error = EINVAL;
636 			goto out;
637 		}
638 		pi = &sc->port[i];
639 		pi->adapter = sc;
640 		pi->nqsets = port_qsets;
641 		pi->first_qset = i*port_qsets;
642 		pi->port_id = i;
643 		pi->tx_chan = i >= ai->nports0;
644 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
645 		sc->rxpkt_map[pi->txpkt_intf] = i;
646 		sc->port[i].tx_chan = i >= ai->nports0;
647 		sc->portdev[i] = child;
648 		device_set_softc(child, pi);
649 	}
650 	if ((error = bus_generic_attach(dev)) != 0)
651 		goto out;
652 
653 	/* initialize sge private state */
654 	t3_sge_init_adapter(sc);
655 
656 	t3_led_ready(sc);
657 
658 	error = t3_get_fw_version(sc, &vers);
659 	if (error)
660 		goto out;
661 
662 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
663 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
664 	    G_FW_VERSION_MICRO(vers));
665 
666 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
667 		 ai->desc, is_offload(sc) ? "R" : "",
668 		 sc->params.vpd.ec, sc->params.vpd.sn);
669 	device_set_desc_copy(dev, buf);
670 
671 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
672 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
673 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
674 
675 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
676 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
677 	t3_add_attach_sysctls(sc);
678 
679 #ifdef TCP_OFFLOAD
680 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
681 		sc->cpl_handler[i] = cpl_not_handled;
682 #endif
683 
684 	t3_intr_clear(sc);
685 	error = cxgb_setup_interrupts(sc);
686 out:
687 	if (error)
688 		cxgb_free(sc);
689 
690 	return (error);
691 }
692 
693 /*
694  * The cxgb_controller_detach routine is called with the device is
695  * unloaded from the system.
696  */
697 
698 static int
699 cxgb_controller_detach(device_t dev)
700 {
701 	struct adapter *sc;
702 
703 	sc = device_get_softc(dev);
704 
705 	cxgb_free(sc);
706 
707 	return (0);
708 }
709 
710 /*
711  * The cxgb_free() is called by the cxgb_controller_detach() routine
712  * to tear down the structures that were built up in
713  * cxgb_controller_attach(), and should be the final piece of work
714  * done when fully unloading the driver.
715  *
716  *
717  *  1. Shutting down the threads started by the cxgb_controller_attach()
718  *     routine.
719  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
720  *  3. Detaching all of the port devices created during the
721  *     cxgb_controller_attach() routine.
722  *  4. Removing the device children created via cxgb_controller_attach().
723  *  5. Releasing PCI resources associated with the device.
724  *  6. Turning off the offload support, iff it was turned on.
725  *  7. Destroying the mutexes created in cxgb_controller_attach().
726  *
727  */
728 static void
729 cxgb_free(struct adapter *sc)
730 {
731 	int i, nqsets = 0;
732 
733 	ADAPTER_LOCK(sc);
734 	sc->flags |= CXGB_SHUTDOWN;
735 	ADAPTER_UNLOCK(sc);
736 
737 	/*
738 	 * Make sure all child devices are gone.
739 	 */
740 	bus_generic_detach(sc->dev);
741 	for (i = 0; i < (sc)->params.nports; i++) {
742 		if (sc->portdev[i] &&
743 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
744 			device_printf(sc->dev, "failed to delete child port\n");
745 		nqsets += sc->port[i].nqsets;
746 	}
747 
748 	/*
749 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
750 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
751 	 * all open devices have been closed.
752 	 */
753 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
754 					   __func__, sc->open_device_map));
755 	for (i = 0; i < sc->params.nports; i++) {
756 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
757 						  __func__, i));
758 	}
759 
760 	/*
761 	 * Finish off the adapter's callouts.
762 	 */
763 	callout_drain(&sc->cxgb_tick_ch);
764 	callout_drain(&sc->sge_timer_ch);
765 
766 	/*
767 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
768 	 * sysctls are cleaned up by the kernel linker.
769 	 */
770 	if (sc->flags & FULL_INIT_DONE) {
771  		t3_free_sge_resources(sc, nqsets);
772  		sc->flags &= ~FULL_INIT_DONE;
773  	}
774 
775 	/*
776 	 * Release all interrupt resources.
777 	 */
778 	cxgb_teardown_interrupts(sc);
779 	if (sc->flags & (USING_MSI | USING_MSIX)) {
780 		device_printf(sc->dev, "releasing msi message(s)\n");
781 		pci_release_msi(sc->dev);
782 	} else {
783 		device_printf(sc->dev, "no msi message to release\n");
784 	}
785 
786 	if (sc->msix_regs_res != NULL) {
787 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
788 		    sc->msix_regs_res);
789 	}
790 
791 	/*
792 	 * Free the adapter's taskqueue.
793 	 */
794 	if (sc->tq != NULL) {
795 		taskqueue_free(sc->tq);
796 		sc->tq = NULL;
797 	}
798 
799 	free(sc->filters, M_DEVBUF);
800 	t3_sge_free(sc);
801 
802 	if (sc->udbs_res != NULL)
803 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
804 		    sc->udbs_res);
805 
806 	if (sc->regs_res != NULL)
807 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
808 		    sc->regs_res);
809 
810 	MTX_DESTROY(&sc->mdio_lock);
811 	MTX_DESTROY(&sc->sge.reg_lock);
812 	MTX_DESTROY(&sc->elmer_lock);
813 	mtx_lock(&t3_list_lock);
814 	SLIST_REMOVE(&t3_list, sc, adapter, link);
815 	mtx_unlock(&t3_list_lock);
816 	ADAPTER_LOCK_DEINIT(sc);
817 }
818 
819 /**
820  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
821  *	@sc: the controller softc
822  *
823  *	Determines how many sets of SGE queues to use and initializes them.
824  *	We support multiple queue sets per port if we have MSI-X, otherwise
825  *	just one queue set per port.
826  */
827 static int
828 setup_sge_qsets(adapter_t *sc)
829 {
830 	int i, j, err, irq_idx = 0, qset_idx = 0;
831 	u_int ntxq = SGE_TXQ_PER_SET;
832 
833 	if ((err = t3_sge_alloc(sc)) != 0) {
834 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
835 		return (err);
836 	}
837 
838 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
839 		irq_idx = -1;
840 
841 	for (i = 0; i < (sc)->params.nports; i++) {
842 		struct port_info *pi = &sc->port[i];
843 
844 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
845 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
846 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
847 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
848 			if (err) {
849 				t3_free_sge_resources(sc, qset_idx);
850 				device_printf(sc->dev,
851 				    "t3_sge_alloc_qset failed with %d\n", err);
852 				return (err);
853 			}
854 		}
855 	}
856 
857 	sc->nqsets = qset_idx;
858 
859 	return (0);
860 }
861 
862 static void
863 cxgb_teardown_interrupts(adapter_t *sc)
864 {
865 	int i;
866 
867 	for (i = 0; i < SGE_QSETS; i++) {
868 		if (sc->msix_intr_tag[i] == NULL) {
869 
870 			/* Should have been setup fully or not at all */
871 			KASSERT(sc->msix_irq_res[i] == NULL &&
872 				sc->msix_irq_rid[i] == 0,
873 				("%s: half-done interrupt (%d).", __func__, i));
874 
875 			continue;
876 		}
877 
878 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
879 				  sc->msix_intr_tag[i]);
880 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
881 				     sc->msix_irq_res[i]);
882 
883 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
884 		sc->msix_irq_rid[i] = 0;
885 	}
886 
887 	if (sc->intr_tag) {
888 		KASSERT(sc->irq_res != NULL,
889 			("%s: half-done interrupt.", __func__));
890 
891 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
892 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
893 				     sc->irq_res);
894 
895 		sc->irq_res = sc->intr_tag = NULL;
896 		sc->irq_rid = 0;
897 	}
898 }
899 
900 static int
901 cxgb_setup_interrupts(adapter_t *sc)
902 {
903 	struct resource *res;
904 	void *tag;
905 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
906 
907 	sc->irq_rid = intr_flag ? 1 : 0;
908 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
909 					     RF_SHAREABLE | RF_ACTIVE);
910 	if (sc->irq_res == NULL) {
911 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
912 			      intr_flag, sc->irq_rid);
913 		err = EINVAL;
914 		sc->irq_rid = 0;
915 	} else {
916 		err = bus_setup_intr(sc->dev, sc->irq_res,
917 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
918 		    sc->cxgb_intr, sc, &sc->intr_tag);
919 
920 		if (err) {
921 			device_printf(sc->dev,
922 				      "Cannot set up interrupt (%x, %u, %d)\n",
923 				      intr_flag, sc->irq_rid, err);
924 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
925 					     sc->irq_res);
926 			sc->irq_res = sc->intr_tag = NULL;
927 			sc->irq_rid = 0;
928 		}
929 	}
930 
931 	/* That's all for INTx or MSI */
932 	if (!(intr_flag & USING_MSIX) || err)
933 		return (err);
934 
935 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
936 	for (i = 0; i < sc->msi_count - 1; i++) {
937 		rid = i + 2;
938 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
939 					     RF_SHAREABLE | RF_ACTIVE);
940 		if (res == NULL) {
941 			device_printf(sc->dev, "Cannot allocate interrupt "
942 				      "for message %d\n", rid);
943 			err = EINVAL;
944 			break;
945 		}
946 
947 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
948 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
949 		if (err) {
950 			device_printf(sc->dev, "Cannot set up interrupt "
951 				      "for message %d (%d)\n", rid, err);
952 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
953 			break;
954 		}
955 
956 		sc->msix_irq_rid[i] = rid;
957 		sc->msix_irq_res[i] = res;
958 		sc->msix_intr_tag[i] = tag;
959 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
960 	}
961 
962 	if (err)
963 		cxgb_teardown_interrupts(sc);
964 
965 	return (err);
966 }
967 
968 
969 static int
970 cxgb_port_probe(device_t dev)
971 {
972 	struct port_info *p;
973 	char buf[80];
974 	const char *desc;
975 
976 	p = device_get_softc(dev);
977 	desc = p->phy.desc;
978 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
979 	device_set_desc_copy(dev, buf);
980 	return (0);
981 }
982 
983 
984 static int
985 cxgb_makedev(struct port_info *pi)
986 {
987 
988 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
989 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
990 
991 	if (pi->port_cdev == NULL)
992 		return (ENOMEM);
993 
994 	pi->port_cdev->si_drv1 = (void *)pi;
995 
996 	return (0);
997 }
998 
999 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1000     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1001     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
1002 #define CXGB_CAP_ENABLE CXGB_CAP
1003 
1004 static int
1005 cxgb_port_attach(device_t dev)
1006 {
1007 	struct port_info *p;
1008 	struct ifnet *ifp;
1009 	int err;
1010 	struct adapter *sc;
1011 
1012 	p = device_get_softc(dev);
1013 	sc = p->adapter;
1014 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1015 	    device_get_unit(device_get_parent(dev)), p->port_id);
1016 	PORT_LOCK_INIT(p, p->lockbuf);
1017 
1018 	callout_init(&p->link_check_ch, 1);
1019 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1020 
1021 	/* Allocate an ifnet object and set it up */
1022 	ifp = p->ifp = if_alloc(IFT_ETHER);
1023 	if (ifp == NULL) {
1024 		device_printf(dev, "Cannot allocate ifnet\n");
1025 		return (ENOMEM);
1026 	}
1027 
1028 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1029 	ifp->if_init = cxgb_init;
1030 	ifp->if_softc = p;
1031 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1032 	ifp->if_ioctl = cxgb_ioctl;
1033 	ifp->if_transmit = cxgb_transmit;
1034 	ifp->if_qflush = cxgb_qflush;
1035 	ifp->if_get_counter = cxgb_get_counter;
1036 
1037 	ifp->if_capabilities = CXGB_CAP;
1038 #ifdef TCP_OFFLOAD
1039 	if (is_offload(sc))
1040 		ifp->if_capabilities |= IFCAP_TOE4;
1041 #endif
1042 	ifp->if_capenable = CXGB_CAP_ENABLE;
1043 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1044 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1045 	ifp->if_hw_tsomax = IP_MAXPACKET;
1046 	ifp->if_hw_tsomaxsegcount = 36;
1047 	ifp->if_hw_tsomaxsegsize = 65536;
1048 
1049 	/*
1050 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1051 	 */
1052 	if (sc->params.nports > 2) {
1053 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1054 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1055 		ifp->if_hwassist &= ~CSUM_TSO;
1056 	}
1057 
1058 	ether_ifattach(ifp, p->hw_addr);
1059 
1060 	/* Attach driver debugnet methods. */
1061 	DEBUGNET_SET(ifp, cxgb);
1062 
1063 #ifdef DEFAULT_JUMBO
1064 	if (sc->params.nports <= 2)
1065 		ifp->if_mtu = ETHERMTU_JUMBO;
1066 #endif
1067 	if ((err = cxgb_makedev(p)) != 0) {
1068 		printf("makedev failed %d\n", err);
1069 		return (err);
1070 	}
1071 
1072 	/* Create a list of media supported by this port */
1073 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1074 	    cxgb_media_status);
1075 	cxgb_build_medialist(p);
1076 
1077 	t3_sge_init_port(p);
1078 
1079 	return (err);
1080 }
1081 
1082 /*
1083  * cxgb_port_detach() is called via the device_detach methods when
1084  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1085  * removing the device from the view of the kernel, i.e. from all
1086  * interfaces lists etc.  This routine is only called when the driver is
1087  * being unloaded, not when the link goes down.
1088  */
1089 static int
1090 cxgb_port_detach(device_t dev)
1091 {
1092 	struct port_info *p;
1093 	struct adapter *sc;
1094 	int i;
1095 
1096 	p = device_get_softc(dev);
1097 	sc = p->adapter;
1098 
1099 	/* Tell cxgb_ioctl and if_init that the port is going away */
1100 	ADAPTER_LOCK(sc);
1101 	SET_DOOMED(p);
1102 	wakeup(&sc->flags);
1103 	while (IS_BUSY(sc))
1104 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1105 	SET_BUSY(sc);
1106 	ADAPTER_UNLOCK(sc);
1107 
1108 	if (p->port_cdev != NULL)
1109 		destroy_dev(p->port_cdev);
1110 
1111 	cxgb_uninit_synchronized(p);
1112 	ether_ifdetach(p->ifp);
1113 
1114 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1115 		struct sge_qset *qs = &sc->sge.qs[i];
1116 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1117 
1118 		callout_drain(&txq->txq_watchdog);
1119 		callout_drain(&txq->txq_timer);
1120 	}
1121 
1122 	PORT_LOCK_DEINIT(p);
1123 	if_free(p->ifp);
1124 	p->ifp = NULL;
1125 
1126 	ADAPTER_LOCK(sc);
1127 	CLR_BUSY(sc);
1128 	wakeup_one(&sc->flags);
1129 	ADAPTER_UNLOCK(sc);
1130 	return (0);
1131 }
1132 
1133 void
1134 t3_fatal_err(struct adapter *sc)
1135 {
1136 	u_int fw_status[4];
1137 
1138 	if (sc->flags & FULL_INIT_DONE) {
1139 		t3_sge_stop(sc);
1140 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1141 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1142 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1143 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1144 		t3_intr_disable(sc);
1145 	}
1146 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1147 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1148 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1149 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1150 }
1151 
1152 int
1153 t3_os_find_pci_capability(adapter_t *sc, int cap)
1154 {
1155 	device_t dev;
1156 	struct pci_devinfo *dinfo;
1157 	pcicfgregs *cfg;
1158 	uint32_t status;
1159 	uint8_t ptr;
1160 
1161 	dev = sc->dev;
1162 	dinfo = device_get_ivars(dev);
1163 	cfg = &dinfo->cfg;
1164 
1165 	status = pci_read_config(dev, PCIR_STATUS, 2);
1166 	if (!(status & PCIM_STATUS_CAPPRESENT))
1167 		return (0);
1168 
1169 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1170 	case 0:
1171 	case 1:
1172 		ptr = PCIR_CAP_PTR;
1173 		break;
1174 	case 2:
1175 		ptr = PCIR_CAP_PTR_2;
1176 		break;
1177 	default:
1178 		return (0);
1179 		break;
1180 	}
1181 	ptr = pci_read_config(dev, ptr, 1);
1182 
1183 	while (ptr != 0) {
1184 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1185 			return (ptr);
1186 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1187 	}
1188 
1189 	return (0);
1190 }
1191 
1192 int
1193 t3_os_pci_save_state(struct adapter *sc)
1194 {
1195 	device_t dev;
1196 	struct pci_devinfo *dinfo;
1197 
1198 	dev = sc->dev;
1199 	dinfo = device_get_ivars(dev);
1200 
1201 	pci_cfg_save(dev, dinfo, 0);
1202 	return (0);
1203 }
1204 
1205 int
1206 t3_os_pci_restore_state(struct adapter *sc)
1207 {
1208 	device_t dev;
1209 	struct pci_devinfo *dinfo;
1210 
1211 	dev = sc->dev;
1212 	dinfo = device_get_ivars(dev);
1213 
1214 	pci_cfg_restore(dev, dinfo);
1215 	return (0);
1216 }
1217 
1218 /**
1219  *	t3_os_link_changed - handle link status changes
1220  *	@sc: the adapter associated with the link change
1221  *	@port_id: the port index whose link status has changed
1222  *	@link_status: the new status of the link
1223  *	@speed: the new speed setting
1224  *	@duplex: the new duplex setting
1225  *	@fc: the new flow-control setting
1226  *
1227  *	This is the OS-dependent handler for link status changes.  The OS
1228  *	neutral handler takes care of most of the processing for these events,
1229  *	then calls this handler for any OS-specific processing.
1230  */
1231 void
1232 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1233      int duplex, int fc, int mac_was_reset)
1234 {
1235 	struct port_info *pi = &adapter->port[port_id];
1236 	struct ifnet *ifp = pi->ifp;
1237 
1238 	/* no race with detach, so ifp should always be good */
1239 	KASSERT(ifp, ("%s: if detached.", __func__));
1240 
1241 	/* Reapply mac settings if they were lost due to a reset */
1242 	if (mac_was_reset) {
1243 		PORT_LOCK(pi);
1244 		cxgb_update_mac_settings(pi);
1245 		PORT_UNLOCK(pi);
1246 	}
1247 
1248 	if (link_status) {
1249 		ifp->if_baudrate = IF_Mbps(speed);
1250 		if_link_state_change(ifp, LINK_STATE_UP);
1251 	} else
1252 		if_link_state_change(ifp, LINK_STATE_DOWN);
1253 }
1254 
1255 /**
1256  *	t3_os_phymod_changed - handle PHY module changes
1257  *	@phy: the PHY reporting the module change
1258  *	@mod_type: new module type
1259  *
1260  *	This is the OS-dependent handler for PHY module changes.  It is
1261  *	invoked when a PHY module is removed or inserted for any OS-specific
1262  *	processing.
1263  */
1264 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1265 {
1266 	static const char *mod_str[] = {
1267 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1268 	};
1269 	struct port_info *pi = &adap->port[port_id];
1270 	int mod = pi->phy.modtype;
1271 
1272 	if (mod != pi->media.ifm_cur->ifm_data)
1273 		cxgb_build_medialist(pi);
1274 
1275 	if (mod == phy_modtype_none)
1276 		if_printf(pi->ifp, "PHY module unplugged\n");
1277 	else {
1278 		KASSERT(mod < ARRAY_SIZE(mod_str),
1279 			("invalid PHY module type %d", mod));
1280 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1281 	}
1282 }
1283 
1284 void
1285 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1286 {
1287 
1288 	/*
1289 	 * The ifnet might not be allocated before this gets called,
1290 	 * as this is called early on in attach by t3_prep_adapter
1291 	 * save the address off in the port structure
1292 	 */
1293 	if (cxgb_debug)
1294 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1295 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1296 }
1297 
1298 /*
1299  * Programs the XGMAC based on the settings in the ifnet.  These settings
1300  * include MTU, MAC address, mcast addresses, etc.
1301  */
1302 static void
1303 cxgb_update_mac_settings(struct port_info *p)
1304 {
1305 	struct ifnet *ifp = p->ifp;
1306 	struct t3_rx_mode rm;
1307 	struct cmac *mac = &p->mac;
1308 	int mtu, hwtagging;
1309 
1310 	PORT_LOCK_ASSERT_OWNED(p);
1311 
1312 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1313 
1314 	mtu = ifp->if_mtu;
1315 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1316 		mtu += ETHER_VLAN_ENCAP_LEN;
1317 
1318 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1319 
1320 	t3_mac_set_mtu(mac, mtu);
1321 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1322 	t3_mac_set_address(mac, 0, p->hw_addr);
1323 	t3_init_rx_mode(&rm, p);
1324 	t3_mac_set_rx_mode(mac, &rm);
1325 }
1326 
1327 
1328 static int
1329 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1330 			      unsigned long n)
1331 {
1332 	int attempts = 5;
1333 
1334 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1335 		if (!--attempts)
1336 			return (ETIMEDOUT);
1337 		t3_os_sleep(10);
1338 	}
1339 	return 0;
1340 }
1341 
1342 static int
1343 init_tp_parity(struct adapter *adap)
1344 {
1345 	int i;
1346 	struct mbuf *m;
1347 	struct cpl_set_tcb_field *greq;
1348 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1349 
1350 	t3_tp_set_offload_mode(adap, 1);
1351 
1352 	for (i = 0; i < 16; i++) {
1353 		struct cpl_smt_write_req *req;
1354 
1355 		m = m_gethdr(M_WAITOK, MT_DATA);
1356 		req = mtod(m, struct cpl_smt_write_req *);
1357 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1358 		memset(req, 0, sizeof(*req));
1359 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1360 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1361 		req->iff = i;
1362 		t3_mgmt_tx(adap, m);
1363 	}
1364 
1365 	for (i = 0; i < 2048; i++) {
1366 		struct cpl_l2t_write_req *req;
1367 
1368 		m = m_gethdr(M_WAITOK, MT_DATA);
1369 		req = mtod(m, struct cpl_l2t_write_req *);
1370 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1371 		memset(req, 0, sizeof(*req));
1372 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1373 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1374 		req->params = htonl(V_L2T_W_IDX(i));
1375 		t3_mgmt_tx(adap, m);
1376 	}
1377 
1378 	for (i = 0; i < 2048; i++) {
1379 		struct cpl_rte_write_req *req;
1380 
1381 		m = m_gethdr(M_WAITOK, MT_DATA);
1382 		req = mtod(m, struct cpl_rte_write_req *);
1383 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1384 		memset(req, 0, sizeof(*req));
1385 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1386 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1387 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1388 		t3_mgmt_tx(adap, m);
1389 	}
1390 
1391 	m = m_gethdr(M_WAITOK, MT_DATA);
1392 	greq = mtod(m, struct cpl_set_tcb_field *);
1393 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1394 	memset(greq, 0, sizeof(*greq));
1395 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1396 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1397 	greq->mask = htobe64(1);
1398 	t3_mgmt_tx(adap, m);
1399 
1400 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1401 	t3_tp_set_offload_mode(adap, 0);
1402 	return (i);
1403 }
1404 
1405 /**
1406  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1407  *	@adap: the adapter
1408  *
1409  *	Sets up RSS to distribute packets to multiple receive queues.  We
1410  *	configure the RSS CPU lookup table to distribute to the number of HW
1411  *	receive queues, and the response queue lookup table to narrow that
1412  *	down to the response queues actually configured for each port.
1413  *	We always configure the RSS mapping for two ports since the mapping
1414  *	table has plenty of entries.
1415  */
1416 static void
1417 setup_rss(adapter_t *adap)
1418 {
1419 	int i;
1420 	u_int nq[2];
1421 	uint8_t cpus[SGE_QSETS + 1];
1422 	uint16_t rspq_map[RSS_TABLE_SIZE];
1423 
1424 	for (i = 0; i < SGE_QSETS; ++i)
1425 		cpus[i] = i;
1426 	cpus[SGE_QSETS] = 0xff;
1427 
1428 	nq[0] = nq[1] = 0;
1429 	for_each_port(adap, i) {
1430 		const struct port_info *pi = adap2pinfo(adap, i);
1431 
1432 		nq[pi->tx_chan] += pi->nqsets;
1433 	}
1434 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1435 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1436 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1437 	}
1438 
1439 	/* Calculate the reverse RSS map table */
1440 	for (i = 0; i < SGE_QSETS; ++i)
1441 		adap->rrss_map[i] = 0xff;
1442 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1443 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1444 			adap->rrss_map[rspq_map[i]] = i;
1445 
1446 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1447 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1448 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1449 	              cpus, rspq_map);
1450 
1451 }
1452 static void
1453 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1454 			      int hi, int port)
1455 {
1456 	struct mbuf *m;
1457 	struct mngt_pktsched_wr *req;
1458 
1459 	m = m_gethdr(M_NOWAIT, MT_DATA);
1460 	if (m) {
1461 		req = mtod(m, struct mngt_pktsched_wr *);
1462 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1463 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1464 		req->sched = sched;
1465 		req->idx = qidx;
1466 		req->min = lo;
1467 		req->max = hi;
1468 		req->binding = port;
1469 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1470 		t3_mgmt_tx(adap, m);
1471 	}
1472 }
1473 
1474 static void
1475 bind_qsets(adapter_t *sc)
1476 {
1477 	int i, j;
1478 
1479 	for (i = 0; i < (sc)->params.nports; ++i) {
1480 		const struct port_info *pi = adap2pinfo(sc, i);
1481 
1482 		for (j = 0; j < pi->nqsets; ++j) {
1483 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1484 					  -1, pi->tx_chan);
1485 
1486 		}
1487 	}
1488 }
1489 
1490 static void
1491 update_tpeeprom(struct adapter *adap)
1492 {
1493 	const struct firmware *tpeeprom;
1494 
1495 	uint32_t version;
1496 	unsigned int major, minor;
1497 	int ret, len;
1498 	char rev, name[32];
1499 
1500 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1501 
1502 	major = G_TP_VERSION_MAJOR(version);
1503 	minor = G_TP_VERSION_MINOR(version);
1504 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1505 		return;
1506 
1507 	rev = t3rev2char(adap);
1508 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1509 
1510 	tpeeprom = firmware_get(name);
1511 	if (tpeeprom == NULL) {
1512 		device_printf(adap->dev,
1513 			      "could not load TP EEPROM: unable to load %s\n",
1514 			      name);
1515 		return;
1516 	}
1517 
1518 	len = tpeeprom->datasize - 4;
1519 
1520 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1521 	if (ret)
1522 		goto release_tpeeprom;
1523 
1524 	if (len != TP_SRAM_LEN) {
1525 		device_printf(adap->dev,
1526 			      "%s length is wrong len=%d expected=%d\n", name,
1527 			      len, TP_SRAM_LEN);
1528 		return;
1529 	}
1530 
1531 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1532 	    TP_SRAM_OFFSET);
1533 
1534 	if (!ret) {
1535 		device_printf(adap->dev,
1536 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1537 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1538 	} else
1539 		device_printf(adap->dev,
1540 			      "Protocol SRAM image update in EEPROM failed\n");
1541 
1542 release_tpeeprom:
1543 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1544 
1545 	return;
1546 }
1547 
1548 static int
1549 update_tpsram(struct adapter *adap)
1550 {
1551 	const struct firmware *tpsram;
1552 	int ret;
1553 	char rev, name[32];
1554 
1555 	rev = t3rev2char(adap);
1556 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1557 
1558 	update_tpeeprom(adap);
1559 
1560 	tpsram = firmware_get(name);
1561 	if (tpsram == NULL){
1562 		device_printf(adap->dev, "could not load TP SRAM\n");
1563 		return (EINVAL);
1564 	} else
1565 		device_printf(adap->dev, "updating TP SRAM\n");
1566 
1567 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1568 	if (ret)
1569 		goto release_tpsram;
1570 
1571 	ret = t3_set_proto_sram(adap, tpsram->data);
1572 	if (ret)
1573 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1574 
1575 release_tpsram:
1576 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1577 
1578 	return ret;
1579 }
1580 
1581 /**
1582  *	cxgb_up - enable the adapter
1583  *	@adap: adapter being enabled
1584  *
1585  *	Called when the first port is enabled, this function performs the
1586  *	actions necessary to make an adapter operational, such as completing
1587  *	the initialization of HW modules, and enabling interrupts.
1588  */
1589 static int
1590 cxgb_up(struct adapter *sc)
1591 {
1592 	int err = 0;
1593 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1594 
1595 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1596 					   __func__, sc->open_device_map));
1597 
1598 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1599 
1600 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1601 
1602 		if ((sc->flags & FW_UPTODATE) == 0)
1603 			if ((err = upgrade_fw(sc)))
1604 				goto out;
1605 
1606 		if ((sc->flags & TPS_UPTODATE) == 0)
1607 			if ((err = update_tpsram(sc)))
1608 				goto out;
1609 
1610 		if (is_offload(sc) && nfilters != 0) {
1611 			sc->params.mc5.nservers = 0;
1612 
1613 			if (nfilters < 0)
1614 				sc->params.mc5.nfilters = mxf;
1615 			else
1616 				sc->params.mc5.nfilters = min(nfilters, mxf);
1617 		}
1618 
1619 		err = t3_init_hw(sc, 0);
1620 		if (err)
1621 			goto out;
1622 
1623 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1624 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1625 
1626 		err = setup_sge_qsets(sc);
1627 		if (err)
1628 			goto out;
1629 
1630 		alloc_filters(sc);
1631 		setup_rss(sc);
1632 
1633 		t3_add_configured_sysctls(sc);
1634 		sc->flags |= FULL_INIT_DONE;
1635 	}
1636 
1637 	t3_intr_clear(sc);
1638 	t3_sge_start(sc);
1639 	t3_intr_enable(sc);
1640 
1641 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1642 	    is_offload(sc) && init_tp_parity(sc) == 0)
1643 		sc->flags |= TP_PARITY_INIT;
1644 
1645 	if (sc->flags & TP_PARITY_INIT) {
1646 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1647 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1648 	}
1649 
1650 	if (!(sc->flags & QUEUES_BOUND)) {
1651 		bind_qsets(sc);
1652 		setup_hw_filters(sc);
1653 		sc->flags |= QUEUES_BOUND;
1654 	}
1655 
1656 	t3_sge_reset_adapter(sc);
1657 out:
1658 	return (err);
1659 }
1660 
1661 /*
1662  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1663  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1664  * during controller_detach, not here.
1665  */
1666 static void
1667 cxgb_down(struct adapter *sc)
1668 {
1669 	t3_sge_stop(sc);
1670 	t3_intr_disable(sc);
1671 }
1672 
1673 /*
1674  * if_init for cxgb ports.
1675  */
1676 static void
1677 cxgb_init(void *arg)
1678 {
1679 	struct port_info *p = arg;
1680 	struct adapter *sc = p->adapter;
1681 
1682 	ADAPTER_LOCK(sc);
1683 	cxgb_init_locked(p); /* releases adapter lock */
1684 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1685 }
1686 
1687 static int
1688 cxgb_init_locked(struct port_info *p)
1689 {
1690 	struct adapter *sc = p->adapter;
1691 	struct ifnet *ifp = p->ifp;
1692 	struct cmac *mac = &p->mac;
1693 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1694 
1695 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1696 
1697 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1698 		gave_up_lock = 1;
1699 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1700 			rc = EINTR;
1701 			goto done;
1702 		}
1703 	}
1704 	if (IS_DOOMED(p)) {
1705 		rc = ENXIO;
1706 		goto done;
1707 	}
1708 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1709 
1710 	/*
1711 	 * The code that runs during one-time adapter initialization can sleep
1712 	 * so it's important not to hold any locks across it.
1713 	 */
1714 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1715 
1716 	if (may_sleep) {
1717 		SET_BUSY(sc);
1718 		gave_up_lock = 1;
1719 		ADAPTER_UNLOCK(sc);
1720 	}
1721 
1722 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1723 			goto done;
1724 
1725 	PORT_LOCK(p);
1726 	if (isset(&sc->open_device_map, p->port_id) &&
1727 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1728 		PORT_UNLOCK(p);
1729 		goto done;
1730 	}
1731 	t3_port_intr_enable(sc, p->port_id);
1732 	if (!mac->multiport)
1733 		t3_mac_init(mac);
1734 	cxgb_update_mac_settings(p);
1735 	t3_link_start(&p->phy, mac, &p->link_config);
1736 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1737 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1738 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1739 	PORT_UNLOCK(p);
1740 
1741 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1742 		struct sge_qset *qs = &sc->sge.qs[i];
1743 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1744 
1745 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1746 				 txq->txq_watchdog.c_cpu);
1747 	}
1748 
1749 	/* all ok */
1750 	setbit(&sc->open_device_map, p->port_id);
1751 	callout_reset(&p->link_check_ch,
1752 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1753 	    link_check_callout, p);
1754 
1755 done:
1756 	if (may_sleep) {
1757 		ADAPTER_LOCK(sc);
1758 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1759 		CLR_BUSY(sc);
1760 	}
1761 	if (gave_up_lock)
1762 		wakeup_one(&sc->flags);
1763 	ADAPTER_UNLOCK(sc);
1764 	return (rc);
1765 }
1766 
1767 static int
1768 cxgb_uninit_locked(struct port_info *p)
1769 {
1770 	struct adapter *sc = p->adapter;
1771 	int rc;
1772 
1773 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1774 
1775 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1776 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1777 			rc = EINTR;
1778 			goto done;
1779 		}
1780 	}
1781 	if (IS_DOOMED(p)) {
1782 		rc = ENXIO;
1783 		goto done;
1784 	}
1785 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1786 	SET_BUSY(sc);
1787 	ADAPTER_UNLOCK(sc);
1788 
1789 	rc = cxgb_uninit_synchronized(p);
1790 
1791 	ADAPTER_LOCK(sc);
1792 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1793 	CLR_BUSY(sc);
1794 	wakeup_one(&sc->flags);
1795 done:
1796 	ADAPTER_UNLOCK(sc);
1797 	return (rc);
1798 }
1799 
1800 /*
1801  * Called on "ifconfig down", and from port_detach
1802  */
1803 static int
1804 cxgb_uninit_synchronized(struct port_info *pi)
1805 {
1806 	struct adapter *sc = pi->adapter;
1807 	struct ifnet *ifp = pi->ifp;
1808 
1809 	/*
1810 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1811 	 */
1812 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1813 
1814 	/*
1815 	 * Clear this port's bit from the open device map, and then drain all
1816 	 * the tasks that can access/manipulate this port's port_info or ifp.
1817 	 * We disable this port's interrupts here and so the slow/ext
1818 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1819 	 * be enqueued every second but the runs after this drain will not see
1820 	 * this port in the open device map.
1821 	 *
1822 	 * A well behaved task must take open_device_map into account and ignore
1823 	 * ports that are not open.
1824 	 */
1825 	clrbit(&sc->open_device_map, pi->port_id);
1826 	t3_port_intr_disable(sc, pi->port_id);
1827 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1828 	taskqueue_drain(sc->tq, &sc->tick_task);
1829 
1830 	callout_drain(&pi->link_check_ch);
1831 	taskqueue_drain(sc->tq, &pi->link_check_task);
1832 
1833 	PORT_LOCK(pi);
1834 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1835 
1836 	/* disable pause frames */
1837 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1838 
1839 	/* Reset RX FIFO HWM */
1840 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1841 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1842 
1843 	DELAY(100 * 1000);
1844 
1845 	/* Wait for TXFIFO empty */
1846 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1847 			F_TXFIFO_EMPTY, 1, 20, 5);
1848 
1849 	DELAY(100 * 1000);
1850 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1851 
1852 	pi->phy.ops->power_down(&pi->phy, 1);
1853 
1854 	PORT_UNLOCK(pi);
1855 
1856 	pi->link_config.link_ok = 0;
1857 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1858 
1859 	if (sc->open_device_map == 0)
1860 		cxgb_down(pi->adapter);
1861 
1862 	return (0);
1863 }
1864 
1865 /*
1866  * Mark lro enabled or disabled in all qsets for this port
1867  */
1868 static int
1869 cxgb_set_lro(struct port_info *p, int enabled)
1870 {
1871 	int i;
1872 	struct adapter *adp = p->adapter;
1873 	struct sge_qset *q;
1874 
1875 	for (i = 0; i < p->nqsets; i++) {
1876 		q = &adp->sge.qs[p->first_qset + i];
1877 		q->lro.enabled = (enabled != 0);
1878 	}
1879 	return (0);
1880 }
1881 
1882 static int
1883 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1884 {
1885 	struct port_info *p = ifp->if_softc;
1886 	struct adapter *sc = p->adapter;
1887 	struct ifreq *ifr = (struct ifreq *)data;
1888 	int flags, error = 0, mtu;
1889 	uint32_t mask;
1890 
1891 	switch (command) {
1892 	case SIOCSIFMTU:
1893 		ADAPTER_LOCK(sc);
1894 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1895 		if (error) {
1896 fail:
1897 			ADAPTER_UNLOCK(sc);
1898 			return (error);
1899 		}
1900 
1901 		mtu = ifr->ifr_mtu;
1902 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1903 			error = EINVAL;
1904 		} else {
1905 			ifp->if_mtu = mtu;
1906 			PORT_LOCK(p);
1907 			cxgb_update_mac_settings(p);
1908 			PORT_UNLOCK(p);
1909 		}
1910 		ADAPTER_UNLOCK(sc);
1911 		break;
1912 	case SIOCSIFFLAGS:
1913 		ADAPTER_LOCK(sc);
1914 		if (IS_DOOMED(p)) {
1915 			error = ENXIO;
1916 			goto fail;
1917 		}
1918 		if (ifp->if_flags & IFF_UP) {
1919 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1920 				flags = p->if_flags;
1921 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1922 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1923 					if (IS_BUSY(sc)) {
1924 						error = EBUSY;
1925 						goto fail;
1926 					}
1927 					PORT_LOCK(p);
1928 					cxgb_update_mac_settings(p);
1929 					PORT_UNLOCK(p);
1930 				}
1931 				ADAPTER_UNLOCK(sc);
1932 			} else
1933 				error = cxgb_init_locked(p);
1934 			p->if_flags = ifp->if_flags;
1935 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1936 			error = cxgb_uninit_locked(p);
1937 		else
1938 			ADAPTER_UNLOCK(sc);
1939 
1940 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1941 		break;
1942 	case SIOCADDMULTI:
1943 	case SIOCDELMULTI:
1944 		ADAPTER_LOCK(sc);
1945 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1946 		if (error)
1947 			goto fail;
1948 
1949 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1950 			PORT_LOCK(p);
1951 			cxgb_update_mac_settings(p);
1952 			PORT_UNLOCK(p);
1953 		}
1954 		ADAPTER_UNLOCK(sc);
1955 
1956 		break;
1957 	case SIOCSIFCAP:
1958 		ADAPTER_LOCK(sc);
1959 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1960 		if (error)
1961 			goto fail;
1962 
1963 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1964 		if (mask & IFCAP_TXCSUM) {
1965 			ifp->if_capenable ^= IFCAP_TXCSUM;
1966 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1967 
1968 			if (IFCAP_TSO4 & ifp->if_capenable &&
1969 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1970 				mask &= ~IFCAP_TSO4;
1971 				ifp->if_capenable &= ~IFCAP_TSO4;
1972 				if_printf(ifp,
1973 				    "tso4 disabled due to -txcsum.\n");
1974 			}
1975 		}
1976 		if (mask & IFCAP_TXCSUM_IPV6) {
1977 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1978 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1979 
1980 			if (IFCAP_TSO6 & ifp->if_capenable &&
1981 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1982 				mask &= ~IFCAP_TSO6;
1983 				ifp->if_capenable &= ~IFCAP_TSO6;
1984 				if_printf(ifp,
1985 				    "tso6 disabled due to -txcsum6.\n");
1986 			}
1987 		}
1988 		if (mask & IFCAP_RXCSUM)
1989 			ifp->if_capenable ^= IFCAP_RXCSUM;
1990 		if (mask & IFCAP_RXCSUM_IPV6)
1991 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1992 
1993 		/*
1994 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1995 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1996 		 * sending a TSO request our way, so it's sufficient to toggle
1997 		 * IFCAP_TSOx only.
1998 		 */
1999 		if (mask & IFCAP_TSO4) {
2000 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2001 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2002 				if_printf(ifp, "enable txcsum first.\n");
2003 				error = EAGAIN;
2004 				goto fail;
2005 			}
2006 			ifp->if_capenable ^= IFCAP_TSO4;
2007 		}
2008 		if (mask & IFCAP_TSO6) {
2009 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2010 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2011 				if_printf(ifp, "enable txcsum6 first.\n");
2012 				error = EAGAIN;
2013 				goto fail;
2014 			}
2015 			ifp->if_capenable ^= IFCAP_TSO6;
2016 		}
2017 		if (mask & IFCAP_LRO) {
2018 			ifp->if_capenable ^= IFCAP_LRO;
2019 
2020 			/* Safe to do this even if cxgb_up not called yet */
2021 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2022 		}
2023 #ifdef TCP_OFFLOAD
2024 		if (mask & IFCAP_TOE4) {
2025 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2026 
2027 			error = toe_capability(p, enable);
2028 			if (error == 0)
2029 				ifp->if_capenable ^= mask;
2030 		}
2031 #endif
2032 		if (mask & IFCAP_VLAN_HWTAGGING) {
2033 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2034 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2035 				PORT_LOCK(p);
2036 				cxgb_update_mac_settings(p);
2037 				PORT_UNLOCK(p);
2038 			}
2039 		}
2040 		if (mask & IFCAP_VLAN_MTU) {
2041 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2042 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2043 				PORT_LOCK(p);
2044 				cxgb_update_mac_settings(p);
2045 				PORT_UNLOCK(p);
2046 			}
2047 		}
2048 		if (mask & IFCAP_VLAN_HWTSO)
2049 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2050 		if (mask & IFCAP_VLAN_HWCSUM)
2051 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2052 
2053 #ifdef VLAN_CAPABILITIES
2054 		VLAN_CAPABILITIES(ifp);
2055 #endif
2056 		ADAPTER_UNLOCK(sc);
2057 		break;
2058 	case SIOCSIFMEDIA:
2059 	case SIOCGIFMEDIA:
2060 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2061 		break;
2062 	default:
2063 		error = ether_ioctl(ifp, command, data);
2064 	}
2065 
2066 	return (error);
2067 }
2068 
2069 static int
2070 cxgb_media_change(struct ifnet *ifp)
2071 {
2072 	return (EOPNOTSUPP);
2073 }
2074 
2075 /*
2076  * Translates phy->modtype to the correct Ethernet media subtype.
2077  */
2078 static int
2079 cxgb_ifm_type(int mod)
2080 {
2081 	switch (mod) {
2082 	case phy_modtype_sr:
2083 		return (IFM_10G_SR);
2084 	case phy_modtype_lr:
2085 		return (IFM_10G_LR);
2086 	case phy_modtype_lrm:
2087 		return (IFM_10G_LRM);
2088 	case phy_modtype_twinax:
2089 		return (IFM_10G_TWINAX);
2090 	case phy_modtype_twinax_long:
2091 		return (IFM_10G_TWINAX_LONG);
2092 	case phy_modtype_none:
2093 		return (IFM_NONE);
2094 	case phy_modtype_unknown:
2095 		return (IFM_UNKNOWN);
2096 	}
2097 
2098 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2099 	return (IFM_UNKNOWN);
2100 }
2101 
2102 /*
2103  * Rebuilds the ifmedia list for this port, and sets the current media.
2104  */
2105 static void
2106 cxgb_build_medialist(struct port_info *p)
2107 {
2108 	struct cphy *phy = &p->phy;
2109 	struct ifmedia *media = &p->media;
2110 	int mod = phy->modtype;
2111 	int m = IFM_ETHER | IFM_FDX;
2112 
2113 	PORT_LOCK(p);
2114 
2115 	ifmedia_removeall(media);
2116 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2117 		/* Copper (RJ45) */
2118 
2119 		if (phy->caps & SUPPORTED_10000baseT_Full)
2120 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2121 
2122 		if (phy->caps & SUPPORTED_1000baseT_Full)
2123 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2124 
2125 		if (phy->caps & SUPPORTED_100baseT_Full)
2126 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2127 
2128 		if (phy->caps & SUPPORTED_10baseT_Full)
2129 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2130 
2131 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2132 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2133 
2134 	} else if (phy->caps & SUPPORTED_TP) {
2135 		/* Copper (CX4) */
2136 
2137 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2138 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2139 
2140 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2141 		ifmedia_set(media, m | IFM_10G_CX4);
2142 
2143 	} else if (phy->caps & SUPPORTED_FIBRE &&
2144 		   phy->caps & SUPPORTED_10000baseT_Full) {
2145 		/* 10G optical (but includes SFP+ twinax) */
2146 
2147 		m |= cxgb_ifm_type(mod);
2148 		if (IFM_SUBTYPE(m) == IFM_NONE)
2149 			m &= ~IFM_FDX;
2150 
2151 		ifmedia_add(media, m, mod, NULL);
2152 		ifmedia_set(media, m);
2153 
2154 	} else if (phy->caps & SUPPORTED_FIBRE &&
2155 		   phy->caps & SUPPORTED_1000baseT_Full) {
2156 		/* 1G optical */
2157 
2158 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2159 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2160 		ifmedia_set(media, m | IFM_1000_SX);
2161 
2162 	} else {
2163 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2164 			    phy->caps));
2165 	}
2166 
2167 	PORT_UNLOCK(p);
2168 }
2169 
2170 static void
2171 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2172 {
2173 	struct port_info *p = ifp->if_softc;
2174 	struct ifmedia_entry *cur = p->media.ifm_cur;
2175 	int speed = p->link_config.speed;
2176 
2177 	if (cur->ifm_data != p->phy.modtype) {
2178 		cxgb_build_medialist(p);
2179 		cur = p->media.ifm_cur;
2180 	}
2181 
2182 	ifmr->ifm_status = IFM_AVALID;
2183 	if (!p->link_config.link_ok)
2184 		return;
2185 
2186 	ifmr->ifm_status |= IFM_ACTIVE;
2187 
2188 	/*
2189 	 * active and current will differ iff current media is autoselect.  That
2190 	 * can happen only for copper RJ45.
2191 	 */
2192 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2193 		return;
2194 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2195 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2196 
2197 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2198 	if (speed == SPEED_10000)
2199 		ifmr->ifm_active |= IFM_10G_T;
2200 	else if (speed == SPEED_1000)
2201 		ifmr->ifm_active |= IFM_1000_T;
2202 	else if (speed == SPEED_100)
2203 		ifmr->ifm_active |= IFM_100_TX;
2204 	else if (speed == SPEED_10)
2205 		ifmr->ifm_active |= IFM_10_T;
2206 	else
2207 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2208 			    speed));
2209 }
2210 
2211 static uint64_t
2212 cxgb_get_counter(struct ifnet *ifp, ift_counter c)
2213 {
2214 	struct port_info *pi = ifp->if_softc;
2215 	struct adapter *sc = pi->adapter;
2216 	struct cmac *mac = &pi->mac;
2217 	struct mac_stats *mstats = &mac->stats;
2218 
2219 	cxgb_refresh_stats(pi);
2220 
2221 	switch (c) {
2222 	case IFCOUNTER_IPACKETS:
2223 		return (mstats->rx_frames);
2224 
2225 	case IFCOUNTER_IERRORS:
2226 		return (mstats->rx_jabber + mstats->rx_data_errs +
2227 		    mstats->rx_sequence_errs + mstats->rx_runt +
2228 		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2229 		    mstats->rx_short + mstats->rx_fcs_errs);
2230 
2231 	case IFCOUNTER_OPACKETS:
2232 		return (mstats->tx_frames);
2233 
2234 	case IFCOUNTER_OERRORS:
2235 		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2236 		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2237 		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2238 
2239 	case IFCOUNTER_COLLISIONS:
2240 		return (mstats->tx_total_collisions);
2241 
2242 	case IFCOUNTER_IBYTES:
2243 		return (mstats->rx_octets);
2244 
2245 	case IFCOUNTER_OBYTES:
2246 		return (mstats->tx_octets);
2247 
2248 	case IFCOUNTER_IMCASTS:
2249 		return (mstats->rx_mcast_frames);
2250 
2251 	case IFCOUNTER_OMCASTS:
2252 		return (mstats->tx_mcast_frames);
2253 
2254 	case IFCOUNTER_IQDROPS:
2255 		return (mstats->rx_cong_drops);
2256 
2257 	case IFCOUNTER_OQDROPS: {
2258 		int i;
2259 		uint64_t drops;
2260 
2261 		drops = 0;
2262 		if (sc->flags & FULL_INIT_DONE) {
2263 			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2264 				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2265 		}
2266 
2267 		return (drops);
2268 
2269 	}
2270 
2271 	default:
2272 		return (if_get_counter_default(ifp, c));
2273 	}
2274 }
2275 
2276 static void
2277 cxgb_async_intr(void *data)
2278 {
2279 	adapter_t *sc = data;
2280 
2281 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2282 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2283 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2284 }
2285 
2286 static void
2287 link_check_callout(void *arg)
2288 {
2289 	struct port_info *pi = arg;
2290 	struct adapter *sc = pi->adapter;
2291 
2292 	if (!isset(&sc->open_device_map, pi->port_id))
2293 		return;
2294 
2295 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2296 }
2297 
2298 static void
2299 check_link_status(void *arg, int pending)
2300 {
2301 	struct port_info *pi = arg;
2302 	struct adapter *sc = pi->adapter;
2303 
2304 	if (!isset(&sc->open_device_map, pi->port_id))
2305 		return;
2306 
2307 	t3_link_changed(sc, pi->port_id);
2308 
2309 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2310 	    pi->link_config.link_ok == 0)
2311 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2312 }
2313 
2314 void
2315 t3_os_link_intr(struct port_info *pi)
2316 {
2317 	/*
2318 	 * Schedule a link check in the near future.  If the link is flapping
2319 	 * rapidly we'll keep resetting the callout and delaying the check until
2320 	 * things stabilize a bit.
2321 	 */
2322 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2323 }
2324 
2325 static void
2326 check_t3b2_mac(struct adapter *sc)
2327 {
2328 	int i;
2329 
2330 	if (sc->flags & CXGB_SHUTDOWN)
2331 		return;
2332 
2333 	for_each_port(sc, i) {
2334 		struct port_info *p = &sc->port[i];
2335 		int status;
2336 #ifdef INVARIANTS
2337 		struct ifnet *ifp = p->ifp;
2338 #endif
2339 
2340 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2341 		    !p->link_config.link_ok)
2342 			continue;
2343 
2344 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2345 			("%s: state mismatch (drv_flags %x, device_map %x)",
2346 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2347 
2348 		PORT_LOCK(p);
2349 		status = t3b2_mac_watchdog_task(&p->mac);
2350 		if (status == 1)
2351 			p->mac.stats.num_toggled++;
2352 		else if (status == 2) {
2353 			struct cmac *mac = &p->mac;
2354 
2355 			cxgb_update_mac_settings(p);
2356 			t3_link_start(&p->phy, mac, &p->link_config);
2357 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2358 			t3_port_intr_enable(sc, p->port_id);
2359 			p->mac.stats.num_resets++;
2360 		}
2361 		PORT_UNLOCK(p);
2362 	}
2363 }
2364 
2365 static void
2366 cxgb_tick(void *arg)
2367 {
2368 	adapter_t *sc = (adapter_t *)arg;
2369 
2370 	if (sc->flags & CXGB_SHUTDOWN)
2371 		return;
2372 
2373 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2374 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2375 }
2376 
2377 void
2378 cxgb_refresh_stats(struct port_info *pi)
2379 {
2380 	struct timeval tv;
2381 	const struct timeval interval = {0, 250000};    /* 250ms */
2382 
2383 	getmicrotime(&tv);
2384 	timevalsub(&tv, &interval);
2385 	if (timevalcmp(&tv, &pi->last_refreshed, <))
2386 		return;
2387 
2388 	PORT_LOCK(pi);
2389 	t3_mac_update_stats(&pi->mac);
2390 	PORT_UNLOCK(pi);
2391 	getmicrotime(&pi->last_refreshed);
2392 }
2393 
2394 static void
2395 cxgb_tick_handler(void *arg, int count)
2396 {
2397 	adapter_t *sc = (adapter_t *)arg;
2398 	const struct adapter_params *p = &sc->params;
2399 	int i;
2400 	uint32_t cause, reset;
2401 
2402 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2403 		return;
2404 
2405 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2406 		check_t3b2_mac(sc);
2407 
2408 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2409 	if (cause) {
2410 		struct sge_qset *qs = &sc->sge.qs[0];
2411 		uint32_t mask, v;
2412 
2413 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2414 
2415 		mask = 1;
2416 		for (i = 0; i < SGE_QSETS; i++) {
2417 			if (v & mask)
2418 				qs[i].rspq.starved++;
2419 			mask <<= 1;
2420 		}
2421 
2422 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2423 
2424 		for (i = 0; i < SGE_QSETS * 2; i++) {
2425 			if (v & mask) {
2426 				qs[i / 2].fl[i % 2].empty++;
2427 			}
2428 			mask <<= 1;
2429 		}
2430 
2431 		/* clear */
2432 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2433 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2434 	}
2435 
2436 	for (i = 0; i < sc->params.nports; i++) {
2437 		struct port_info *pi = &sc->port[i];
2438 		struct cmac *mac = &pi->mac;
2439 
2440 		if (!isset(&sc->open_device_map, pi->port_id))
2441 			continue;
2442 
2443 		cxgb_refresh_stats(pi);
2444 
2445 		if (mac->multiport)
2446 			continue;
2447 
2448 		/* Count rx fifo overflows, once per second */
2449 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2450 		reset = 0;
2451 		if (cause & F_RXFIFO_OVERFLOW) {
2452 			mac->stats.rx_fifo_ovfl++;
2453 			reset |= F_RXFIFO_OVERFLOW;
2454 		}
2455 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2456 	}
2457 }
2458 
2459 static void
2460 touch_bars(device_t dev)
2461 {
2462 	/*
2463 	 * Don't enable yet
2464 	 */
2465 #if !defined(__LP64__) && 0
2466 	u32 v;
2467 
2468 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2469 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2470 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2471 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2472 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2473 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2474 #endif
2475 }
2476 
2477 static int
2478 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2479 {
2480 	uint8_t *buf;
2481 	int err = 0;
2482 	u32 aligned_offset, aligned_len, *p;
2483 	struct adapter *adapter = pi->adapter;
2484 
2485 
2486 	aligned_offset = offset & ~3;
2487 	aligned_len = (len + (offset & 3) + 3) & ~3;
2488 
2489 	if (aligned_offset != offset || aligned_len != len) {
2490 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2491 		if (!buf)
2492 			return (ENOMEM);
2493 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2494 		if (!err && aligned_len > 4)
2495 			err = t3_seeprom_read(adapter,
2496 					      aligned_offset + aligned_len - 4,
2497 					      (u32 *)&buf[aligned_len - 4]);
2498 		if (err)
2499 			goto out;
2500 		memcpy(buf + (offset & 3), data, len);
2501 	} else
2502 		buf = (uint8_t *)(uintptr_t)data;
2503 
2504 	err = t3_seeprom_wp(adapter, 0);
2505 	if (err)
2506 		goto out;
2507 
2508 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2509 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2510 		aligned_offset += 4;
2511 	}
2512 
2513 	if (!err)
2514 		err = t3_seeprom_wp(adapter, 1);
2515 out:
2516 	if (buf != data)
2517 		free(buf, M_DEVBUF);
2518 	return err;
2519 }
2520 
2521 
2522 static int
2523 in_range(int val, int lo, int hi)
2524 {
2525 	return val < 0 || (val <= hi && val >= lo);
2526 }
2527 
2528 static int
2529 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2530 {
2531        return (0);
2532 }
2533 
2534 static int
2535 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2536 {
2537        return (0);
2538 }
2539 
2540 static int
2541 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2542     int fflag, struct thread *td)
2543 {
2544 	int mmd, error = 0;
2545 	struct port_info *pi = dev->si_drv1;
2546 	adapter_t *sc = pi->adapter;
2547 
2548 #ifdef PRIV_SUPPORTED
2549 	if (priv_check(td, PRIV_DRIVER)) {
2550 		if (cxgb_debug)
2551 			printf("user does not have access to privileged ioctls\n");
2552 		return (EPERM);
2553 	}
2554 #else
2555 	if (suser(td)) {
2556 		if (cxgb_debug)
2557 			printf("user does not have access to privileged ioctls\n");
2558 		return (EPERM);
2559 	}
2560 #endif
2561 
2562 	switch (cmd) {
2563 	case CHELSIO_GET_MIIREG: {
2564 		uint32_t val;
2565 		struct cphy *phy = &pi->phy;
2566 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2567 
2568 		if (!phy->mdio_read)
2569 			return (EOPNOTSUPP);
2570 		if (is_10G(sc)) {
2571 			mmd = mid->phy_id >> 8;
2572 			if (!mmd)
2573 				mmd = MDIO_DEV_PCS;
2574 			else if (mmd > MDIO_DEV_VEND2)
2575 				return (EINVAL);
2576 
2577 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2578 					     mid->reg_num, &val);
2579 		} else
2580 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2581 					     mid->reg_num & 0x1f, &val);
2582 		if (error == 0)
2583 			mid->val_out = val;
2584 		break;
2585 	}
2586 	case CHELSIO_SET_MIIREG: {
2587 		struct cphy *phy = &pi->phy;
2588 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2589 
2590 		if (!phy->mdio_write)
2591 			return (EOPNOTSUPP);
2592 		if (is_10G(sc)) {
2593 			mmd = mid->phy_id >> 8;
2594 			if (!mmd)
2595 				mmd = MDIO_DEV_PCS;
2596 			else if (mmd > MDIO_DEV_VEND2)
2597 				return (EINVAL);
2598 
2599 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2600 					      mmd, mid->reg_num, mid->val_in);
2601 		} else
2602 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2603 					      mid->reg_num & 0x1f,
2604 					      mid->val_in);
2605 		break;
2606 	}
2607 	case CHELSIO_SETREG: {
2608 		struct ch_reg *edata = (struct ch_reg *)data;
2609 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2610 			return (EFAULT);
2611 		t3_write_reg(sc, edata->addr, edata->val);
2612 		break;
2613 	}
2614 	case CHELSIO_GETREG: {
2615 		struct ch_reg *edata = (struct ch_reg *)data;
2616 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2617 			return (EFAULT);
2618 		edata->val = t3_read_reg(sc, edata->addr);
2619 		break;
2620 	}
2621 	case CHELSIO_GET_SGE_CONTEXT: {
2622 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2623 		mtx_lock_spin(&sc->sge.reg_lock);
2624 		switch (ecntxt->cntxt_type) {
2625 		case CNTXT_TYPE_EGRESS:
2626 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2627 			    ecntxt->data);
2628 			break;
2629 		case CNTXT_TYPE_FL:
2630 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2631 			    ecntxt->data);
2632 			break;
2633 		case CNTXT_TYPE_RSP:
2634 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2635 			    ecntxt->data);
2636 			break;
2637 		case CNTXT_TYPE_CQ:
2638 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2639 			    ecntxt->data);
2640 			break;
2641 		default:
2642 			error = EINVAL;
2643 			break;
2644 		}
2645 		mtx_unlock_spin(&sc->sge.reg_lock);
2646 		break;
2647 	}
2648 	case CHELSIO_GET_SGE_DESC: {
2649 		struct ch_desc *edesc = (struct ch_desc *)data;
2650 		int ret;
2651 		if (edesc->queue_num >= SGE_QSETS * 6)
2652 			return (EINVAL);
2653 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2654 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2655 		if (ret < 0)
2656 			return (EINVAL);
2657 		edesc->size = ret;
2658 		break;
2659 	}
2660 	case CHELSIO_GET_QSET_PARAMS: {
2661 		struct qset_params *q;
2662 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2663 		int q1 = pi->first_qset;
2664 		int nqsets = pi->nqsets;
2665 		int i;
2666 
2667 		if (t->qset_idx >= nqsets)
2668 			return EINVAL;
2669 
2670 		i = q1 + t->qset_idx;
2671 		q = &sc->params.sge.qset[i];
2672 		t->rspq_size   = q->rspq_size;
2673 		t->txq_size[0] = q->txq_size[0];
2674 		t->txq_size[1] = q->txq_size[1];
2675 		t->txq_size[2] = q->txq_size[2];
2676 		t->fl_size[0]  = q->fl_size;
2677 		t->fl_size[1]  = q->jumbo_size;
2678 		t->polling     = q->polling;
2679 		t->lro         = q->lro;
2680 		t->intr_lat    = q->coalesce_usecs;
2681 		t->cong_thres  = q->cong_thres;
2682 		t->qnum        = i;
2683 
2684 		if ((sc->flags & FULL_INIT_DONE) == 0)
2685 			t->vector = 0;
2686 		else if (sc->flags & USING_MSIX)
2687 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2688 		else
2689 			t->vector = rman_get_start(sc->irq_res);
2690 
2691 		break;
2692 	}
2693 	case CHELSIO_GET_QSET_NUM: {
2694 		struct ch_reg *edata = (struct ch_reg *)data;
2695 		edata->val = pi->nqsets;
2696 		break;
2697 	}
2698 	case CHELSIO_LOAD_FW: {
2699 		uint8_t *fw_data;
2700 		uint32_t vers;
2701 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2702 
2703 		/*
2704 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2705 		 *
2706 		 * FW_UPTODATE is also set so the rest of the initialization
2707 		 * will not overwrite what was loaded here.  This gives you the
2708 		 * flexibility to load any firmware (and maybe shoot yourself in
2709 		 * the foot).
2710 		 */
2711 
2712 		ADAPTER_LOCK(sc);
2713 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2714 			ADAPTER_UNLOCK(sc);
2715 			return (EBUSY);
2716 		}
2717 
2718 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2719 		if (!fw_data)
2720 			error = ENOMEM;
2721 		else
2722 			error = copyin(t->buf, fw_data, t->len);
2723 
2724 		if (!error)
2725 			error = -t3_load_fw(sc, fw_data, t->len);
2726 
2727 		if (t3_get_fw_version(sc, &vers) == 0) {
2728 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2729 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2730 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2731 		}
2732 
2733 		if (!error)
2734 			sc->flags |= FW_UPTODATE;
2735 
2736 		free(fw_data, M_DEVBUF);
2737 		ADAPTER_UNLOCK(sc);
2738 		break;
2739 	}
2740 	case CHELSIO_LOAD_BOOT: {
2741 		uint8_t *boot_data;
2742 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2743 
2744 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2745 		if (!boot_data)
2746 			return ENOMEM;
2747 
2748 		error = copyin(t->buf, boot_data, t->len);
2749 		if (!error)
2750 			error = -t3_load_boot(sc, boot_data, t->len);
2751 
2752 		free(boot_data, M_DEVBUF);
2753 		break;
2754 	}
2755 	case CHELSIO_GET_PM: {
2756 		struct ch_pm *m = (struct ch_pm *)data;
2757 		struct tp_params *p = &sc->params.tp;
2758 
2759 		if (!is_offload(sc))
2760 			return (EOPNOTSUPP);
2761 
2762 		m->tx_pg_sz = p->tx_pg_size;
2763 		m->tx_num_pg = p->tx_num_pgs;
2764 		m->rx_pg_sz  = p->rx_pg_size;
2765 		m->rx_num_pg = p->rx_num_pgs;
2766 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2767 
2768 		break;
2769 	}
2770 	case CHELSIO_SET_PM: {
2771 		struct ch_pm *m = (struct ch_pm *)data;
2772 		struct tp_params *p = &sc->params.tp;
2773 
2774 		if (!is_offload(sc))
2775 			return (EOPNOTSUPP);
2776 		if (sc->flags & FULL_INIT_DONE)
2777 			return (EBUSY);
2778 
2779 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2780 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2781 			return (EINVAL);	/* not power of 2 */
2782 		if (!(m->rx_pg_sz & 0x14000))
2783 			return (EINVAL);	/* not 16KB or 64KB */
2784 		if (!(m->tx_pg_sz & 0x1554000))
2785 			return (EINVAL);
2786 		if (m->tx_num_pg == -1)
2787 			m->tx_num_pg = p->tx_num_pgs;
2788 		if (m->rx_num_pg == -1)
2789 			m->rx_num_pg = p->rx_num_pgs;
2790 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2791 			return (EINVAL);
2792 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2793 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2794 			return (EINVAL);
2795 
2796 		p->rx_pg_size = m->rx_pg_sz;
2797 		p->tx_pg_size = m->tx_pg_sz;
2798 		p->rx_num_pgs = m->rx_num_pg;
2799 		p->tx_num_pgs = m->tx_num_pg;
2800 		break;
2801 	}
2802 	case CHELSIO_SETMTUTAB: {
2803 		struct ch_mtus *m = (struct ch_mtus *)data;
2804 		int i;
2805 
2806 		if (!is_offload(sc))
2807 			return (EOPNOTSUPP);
2808 		if (offload_running(sc))
2809 			return (EBUSY);
2810 		if (m->nmtus != NMTUS)
2811 			return (EINVAL);
2812 		if (m->mtus[0] < 81)         /* accommodate SACK */
2813 			return (EINVAL);
2814 
2815 		/*
2816 		 * MTUs must be in ascending order
2817 		 */
2818 		for (i = 1; i < NMTUS; ++i)
2819 			if (m->mtus[i] < m->mtus[i - 1])
2820 				return (EINVAL);
2821 
2822 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2823 		break;
2824 	}
2825 	case CHELSIO_GETMTUTAB: {
2826 		struct ch_mtus *m = (struct ch_mtus *)data;
2827 
2828 		if (!is_offload(sc))
2829 			return (EOPNOTSUPP);
2830 
2831 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2832 		m->nmtus = NMTUS;
2833 		break;
2834 	}
2835 	case CHELSIO_GET_MEM: {
2836 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2837 		struct mc7 *mem;
2838 		uint8_t *useraddr;
2839 		u64 buf[32];
2840 
2841 		/*
2842 		 * Use these to avoid modifying len/addr in the return
2843 		 * struct
2844 		 */
2845 		uint32_t len = t->len, addr = t->addr;
2846 
2847 		if (!is_offload(sc))
2848 			return (EOPNOTSUPP);
2849 		if (!(sc->flags & FULL_INIT_DONE))
2850 			return (EIO);         /* need the memory controllers */
2851 		if ((addr & 0x7) || (len & 0x7))
2852 			return (EINVAL);
2853 		if (t->mem_id == MEM_CM)
2854 			mem = &sc->cm;
2855 		else if (t->mem_id == MEM_PMRX)
2856 			mem = &sc->pmrx;
2857 		else if (t->mem_id == MEM_PMTX)
2858 			mem = &sc->pmtx;
2859 		else
2860 			return (EINVAL);
2861 
2862 		/*
2863 		 * Version scheme:
2864 		 * bits 0..9: chip version
2865 		 * bits 10..15: chip revision
2866 		 */
2867 		t->version = 3 | (sc->params.rev << 10);
2868 
2869 		/*
2870 		 * Read 256 bytes at a time as len can be large and we don't
2871 		 * want to use huge intermediate buffers.
2872 		 */
2873 		useraddr = (uint8_t *)t->buf;
2874 		while (len) {
2875 			unsigned int chunk = min(len, sizeof(buf));
2876 
2877 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2878 			if (error)
2879 				return (-error);
2880 			if (copyout(buf, useraddr, chunk))
2881 				return (EFAULT);
2882 			useraddr += chunk;
2883 			addr += chunk;
2884 			len -= chunk;
2885 		}
2886 		break;
2887 	}
2888 	case CHELSIO_READ_TCAM_WORD: {
2889 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2890 
2891 		if (!is_offload(sc))
2892 			return (EOPNOTSUPP);
2893 		if (!(sc->flags & FULL_INIT_DONE))
2894 			return (EIO);         /* need MC5 */
2895 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2896 		break;
2897 	}
2898 	case CHELSIO_SET_TRACE_FILTER: {
2899 		struct ch_trace *t = (struct ch_trace *)data;
2900 		const struct trace_params *tp;
2901 
2902 		tp = (const struct trace_params *)&t->sip;
2903 		if (t->config_tx)
2904 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2905 					       t->trace_tx);
2906 		if (t->config_rx)
2907 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2908 					       t->trace_rx);
2909 		break;
2910 	}
2911 	case CHELSIO_SET_PKTSCHED: {
2912 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2913 		if (sc->open_device_map == 0)
2914 			return (EAGAIN);
2915 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2916 		    p->binding);
2917 		break;
2918 	}
2919 	case CHELSIO_IFCONF_GETREGS: {
2920 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2921 		int reglen = cxgb_get_regs_len();
2922 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2923 		if (buf == NULL) {
2924 			return (ENOMEM);
2925 		}
2926 		if (regs->len > reglen)
2927 			regs->len = reglen;
2928 		else if (regs->len < reglen)
2929 			error = ENOBUFS;
2930 
2931 		if (!error) {
2932 			cxgb_get_regs(sc, regs, buf);
2933 			error = copyout(buf, regs->data, reglen);
2934 		}
2935 		free(buf, M_DEVBUF);
2936 
2937 		break;
2938 	}
2939 	case CHELSIO_SET_HW_SCHED: {
2940 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2941 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2942 
2943 		if ((sc->flags & FULL_INIT_DONE) == 0)
2944 			return (EAGAIN);       /* need TP to be initialized */
2945 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2946 		    !in_range(t->channel, 0, 1) ||
2947 		    !in_range(t->kbps, 0, 10000000) ||
2948 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2949 		    !in_range(t->flow_ipg, 0,
2950 			      dack_ticks_to_usec(sc, 0x7ff)))
2951 			return (EINVAL);
2952 
2953 		if (t->kbps >= 0) {
2954 			error = t3_config_sched(sc, t->kbps, t->sched);
2955 			if (error < 0)
2956 				return (-error);
2957 		}
2958 		if (t->class_ipg >= 0)
2959 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2960 		if (t->flow_ipg >= 0) {
2961 			t->flow_ipg *= 1000;     /* us -> ns */
2962 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2963 		}
2964 		if (t->mode >= 0) {
2965 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2966 
2967 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2968 					 bit, t->mode ? bit : 0);
2969 		}
2970 		if (t->channel >= 0)
2971 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2972 					 1 << t->sched, t->channel << t->sched);
2973 		break;
2974 	}
2975 	case CHELSIO_GET_EEPROM: {
2976 		int i;
2977 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2978 		uint8_t *buf;
2979 
2980 		if (e->offset & 3 || e->offset >= EEPROMSIZE ||
2981 		    e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) {
2982 			return (EINVAL);
2983 		}
2984 
2985 		buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2986 		if (buf == NULL) {
2987 			return (ENOMEM);
2988 		}
2989 		e->magic = EEPROM_MAGIC;
2990 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2991 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2992 
2993 		if (!error)
2994 			error = copyout(buf + e->offset, e->data, e->len);
2995 
2996 		free(buf, M_DEVBUF);
2997 		break;
2998 	}
2999 	case CHELSIO_CLEAR_STATS: {
3000 		if (!(sc->flags & FULL_INIT_DONE))
3001 			return EAGAIN;
3002 
3003 		PORT_LOCK(pi);
3004 		t3_mac_update_stats(&pi->mac);
3005 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3006 		PORT_UNLOCK(pi);
3007 		break;
3008 	}
3009 	case CHELSIO_GET_UP_LA: {
3010 		struct ch_up_la *la = (struct ch_up_la *)data;
3011 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3012 		if (buf == NULL) {
3013 			return (ENOMEM);
3014 		}
3015 		if (la->bufsize < LA_BUFSIZE)
3016 			error = ENOBUFS;
3017 
3018 		if (!error)
3019 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3020 					      &la->bufsize, buf);
3021 		if (!error)
3022 			error = copyout(buf, la->data, la->bufsize);
3023 
3024 		free(buf, M_DEVBUF);
3025 		break;
3026 	}
3027 	case CHELSIO_GET_UP_IOQS: {
3028 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3029 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3030 		uint32_t *v;
3031 
3032 		if (buf == NULL) {
3033 			return (ENOMEM);
3034 		}
3035 		if (ioqs->bufsize < IOQS_BUFSIZE)
3036 			error = ENOBUFS;
3037 
3038 		if (!error)
3039 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3040 
3041 		if (!error) {
3042 			v = (uint32_t *)buf;
3043 
3044 			ioqs->ioq_rx_enable = *v++;
3045 			ioqs->ioq_tx_enable = *v++;
3046 			ioqs->ioq_rx_status = *v++;
3047 			ioqs->ioq_tx_status = *v++;
3048 
3049 			error = copyout(v, ioqs->data, ioqs->bufsize);
3050 		}
3051 
3052 		free(buf, M_DEVBUF);
3053 		break;
3054 	}
3055 	case CHELSIO_SET_FILTER: {
3056 		struct ch_filter *f = (struct ch_filter *)data;
3057 		struct filter_info *p;
3058 		unsigned int nfilters = sc->params.mc5.nfilters;
3059 
3060 		if (!is_offload(sc))
3061 			return (EOPNOTSUPP);	/* No TCAM */
3062 		if (!(sc->flags & FULL_INIT_DONE))
3063 			return (EAGAIN);	/* mc5 not setup yet */
3064 		if (nfilters == 0)
3065 			return (EBUSY);		/* TOE will use TCAM */
3066 
3067 		/* sanity checks */
3068 		if (f->filter_id >= nfilters ||
3069 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3070 		    (f->val.sport && f->mask.sport != 0xffff) ||
3071 		    (f->val.dport && f->mask.dport != 0xffff) ||
3072 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3073 		    (f->val.vlan_prio &&
3074 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3075 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3076 		    f->qset >= SGE_QSETS ||
3077 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3078 			return (EINVAL);
3079 
3080 		/* Was allocated with M_WAITOK */
3081 		KASSERT(sc->filters, ("filter table NULL\n"));
3082 
3083 		p = &sc->filters[f->filter_id];
3084 		if (p->locked)
3085 			return (EPERM);
3086 
3087 		bzero(p, sizeof(*p));
3088 		p->sip = f->val.sip;
3089 		p->sip_mask = f->mask.sip;
3090 		p->dip = f->val.dip;
3091 		p->sport = f->val.sport;
3092 		p->dport = f->val.dport;
3093 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3094 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3095 		    FILTER_NO_VLAN_PRI;
3096 		p->mac_hit = f->mac_hit;
3097 		p->mac_vld = f->mac_addr_idx != 0xffff;
3098 		p->mac_idx = f->mac_addr_idx;
3099 		p->pkt_type = f->proto;
3100 		p->report_filter_id = f->want_filter_id;
3101 		p->pass = f->pass;
3102 		p->rss = f->rss;
3103 		p->qset = f->qset;
3104 
3105 		error = set_filter(sc, f->filter_id, p);
3106 		if (error == 0)
3107 			p->valid = 1;
3108 		break;
3109 	}
3110 	case CHELSIO_DEL_FILTER: {
3111 		struct ch_filter *f = (struct ch_filter *)data;
3112 		struct filter_info *p;
3113 		unsigned int nfilters = sc->params.mc5.nfilters;
3114 
3115 		if (!is_offload(sc))
3116 			return (EOPNOTSUPP);
3117 		if (!(sc->flags & FULL_INIT_DONE))
3118 			return (EAGAIN);
3119 		if (nfilters == 0 || sc->filters == NULL)
3120 			return (EINVAL);
3121 		if (f->filter_id >= nfilters)
3122 		       return (EINVAL);
3123 
3124 		p = &sc->filters[f->filter_id];
3125 		if (p->locked)
3126 			return (EPERM);
3127 		if (!p->valid)
3128 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3129 
3130 		bzero(p, sizeof(*p));
3131 		p->sip = p->sip_mask = 0xffffffff;
3132 		p->vlan = 0xfff;
3133 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3134 		p->pkt_type = 1;
3135 		error = set_filter(sc, f->filter_id, p);
3136 		break;
3137 	}
3138 	case CHELSIO_GET_FILTER: {
3139 		struct ch_filter *f = (struct ch_filter *)data;
3140 		struct filter_info *p;
3141 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3142 
3143 		if (!is_offload(sc))
3144 			return (EOPNOTSUPP);
3145 		if (!(sc->flags & FULL_INIT_DONE))
3146 			return (EAGAIN);
3147 		if (nfilters == 0 || sc->filters == NULL)
3148 			return (EINVAL);
3149 
3150 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3151 		for (; i < nfilters; i++) {
3152 			p = &sc->filters[i];
3153 			if (!p->valid)
3154 				continue;
3155 
3156 			bzero(f, sizeof(*f));
3157 
3158 			f->filter_id = i;
3159 			f->val.sip = p->sip;
3160 			f->mask.sip = p->sip_mask;
3161 			f->val.dip = p->dip;
3162 			f->mask.dip = p->dip ? 0xffffffff : 0;
3163 			f->val.sport = p->sport;
3164 			f->mask.sport = p->sport ? 0xffff : 0;
3165 			f->val.dport = p->dport;
3166 			f->mask.dport = p->dport ? 0xffff : 0;
3167 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3168 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3169 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3170 			    0 : p->vlan_prio;
3171 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3172 			    0 : FILTER_NO_VLAN_PRI;
3173 			f->mac_hit = p->mac_hit;
3174 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3175 			f->proto = p->pkt_type;
3176 			f->want_filter_id = p->report_filter_id;
3177 			f->pass = p->pass;
3178 			f->rss = p->rss;
3179 			f->qset = p->qset;
3180 
3181 			break;
3182 		}
3183 
3184 		if (i == nfilters)
3185 			f->filter_id = 0xffffffff;
3186 		break;
3187 	}
3188 	default:
3189 		return (EOPNOTSUPP);
3190 		break;
3191 	}
3192 
3193 	return (error);
3194 }
3195 
3196 static __inline void
3197 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3198     unsigned int end)
3199 {
3200 	uint32_t *p = (uint32_t *)(buf + start);
3201 
3202 	for ( ; start <= end; start += sizeof(uint32_t))
3203 		*p++ = t3_read_reg(ap, start);
3204 }
3205 
3206 #define T3_REGMAP_SIZE (3 * 1024)
3207 static int
3208 cxgb_get_regs_len(void)
3209 {
3210 	return T3_REGMAP_SIZE;
3211 }
3212 
3213 static void
3214 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3215 {
3216 
3217 	/*
3218 	 * Version scheme:
3219 	 * bits 0..9: chip version
3220 	 * bits 10..15: chip revision
3221 	 * bit 31: set for PCIe cards
3222 	 */
3223 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3224 
3225 	/*
3226 	 * We skip the MAC statistics registers because they are clear-on-read.
3227 	 * Also reading multi-register stats would need to synchronize with the
3228 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3229 	 */
3230 	memset(buf, 0, cxgb_get_regs_len());
3231 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3232 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3233 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3234 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3235 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3236 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3237 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3238 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3239 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3240 }
3241 
3242 static int
3243 alloc_filters(struct adapter *sc)
3244 {
3245 	struct filter_info *p;
3246 	unsigned int nfilters = sc->params.mc5.nfilters;
3247 
3248 	if (nfilters == 0)
3249 		return (0);
3250 
3251 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3252 	sc->filters = p;
3253 
3254 	p = &sc->filters[nfilters - 1];
3255 	p->vlan = 0xfff;
3256 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3257 	p->pass = p->rss = p->valid = p->locked = 1;
3258 
3259 	return (0);
3260 }
3261 
3262 static int
3263 setup_hw_filters(struct adapter *sc)
3264 {
3265 	int i, rc;
3266 	unsigned int nfilters = sc->params.mc5.nfilters;
3267 
3268 	if (!sc->filters)
3269 		return (0);
3270 
3271 	t3_enable_filters(sc);
3272 
3273 	for (i = rc = 0; i < nfilters && !rc; i++) {
3274 		if (sc->filters[i].locked)
3275 			rc = set_filter(sc, i, &sc->filters[i]);
3276 	}
3277 
3278 	return (rc);
3279 }
3280 
3281 static int
3282 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3283 {
3284 	int len;
3285 	struct mbuf *m;
3286 	struct ulp_txpkt *txpkt;
3287 	struct work_request_hdr *wr;
3288 	struct cpl_pass_open_req *oreq;
3289 	struct cpl_set_tcb_field *sreq;
3290 
3291 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3292 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3293 
3294 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3295 	      sc->params.mc5.nfilters;
3296 
3297 	m = m_gethdr(M_WAITOK, MT_DATA);
3298 	m->m_len = m->m_pkthdr.len = len;
3299 	bzero(mtod(m, char *), len);
3300 
3301 	wr = mtod(m, struct work_request_hdr *);
3302 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3303 
3304 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3305 	txpkt = (struct ulp_txpkt *)oreq;
3306 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3307 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3308 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3309 	oreq->local_port = htons(f->dport);
3310 	oreq->peer_port = htons(f->sport);
3311 	oreq->local_ip = htonl(f->dip);
3312 	oreq->peer_ip = htonl(f->sip);
3313 	oreq->peer_netmask = htonl(f->sip_mask);
3314 	oreq->opt0h = 0;
3315 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3316 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3317 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3318 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3319 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3320 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3321 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3322 
3323 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3324 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3325 			  (f->report_filter_id << 15) | (1 << 23) |
3326 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3327 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3328 	t3_mgmt_tx(sc, m);
3329 
3330 	if (f->pass && !f->rss) {
3331 		len = sizeof(*sreq);
3332 		m = m_gethdr(M_WAITOK, MT_DATA);
3333 		m->m_len = m->m_pkthdr.len = len;
3334 		bzero(mtod(m, char *), len);
3335 		sreq = mtod(m, struct cpl_set_tcb_field *);
3336 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3337 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3338 				 (u64)sc->rrss_map[f->qset] << 19);
3339 		t3_mgmt_tx(sc, m);
3340 	}
3341 	return 0;
3342 }
3343 
3344 static inline void
3345 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3346     unsigned int word, u64 mask, u64 val)
3347 {
3348 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3349 	req->reply = V_NO_REPLY(1);
3350 	req->cpu_idx = 0;
3351 	req->word = htons(word);
3352 	req->mask = htobe64(mask);
3353 	req->val = htobe64(val);
3354 }
3355 
3356 static inline void
3357 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3358     unsigned int word, u64 mask, u64 val)
3359 {
3360 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3361 
3362 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3363 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3364 	mk_set_tcb_field(req, tid, word, mask, val);
3365 }
3366 
3367 void
3368 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3369 {
3370 	struct adapter *sc;
3371 
3372 	mtx_lock(&t3_list_lock);
3373 	SLIST_FOREACH(sc, &t3_list, link) {
3374 		/*
3375 		 * func should not make any assumptions about what state sc is
3376 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3377 		 */
3378 		func(sc, arg);
3379 	}
3380 	mtx_unlock(&t3_list_lock);
3381 }
3382 
3383 #ifdef TCP_OFFLOAD
3384 static int
3385 toe_capability(struct port_info *pi, int enable)
3386 {
3387 	int rc;
3388 	struct adapter *sc = pi->adapter;
3389 
3390 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3391 
3392 	if (!is_offload(sc))
3393 		return (ENODEV);
3394 
3395 	if (enable) {
3396 		if (!(sc->flags & FULL_INIT_DONE)) {
3397 			log(LOG_WARNING,
3398 			    "You must enable a cxgb interface first\n");
3399 			return (EAGAIN);
3400 		}
3401 
3402 		if (isset(&sc->offload_map, pi->port_id))
3403 			return (0);
3404 
3405 		if (!(sc->flags & TOM_INIT_DONE)) {
3406 			rc = t3_activate_uld(sc, ULD_TOM);
3407 			if (rc == EAGAIN) {
3408 				log(LOG_WARNING,
3409 				    "You must kldload t3_tom.ko before trying "
3410 				    "to enable TOE on a cxgb interface.\n");
3411 			}
3412 			if (rc != 0)
3413 				return (rc);
3414 			KASSERT(sc->tom_softc != NULL,
3415 			    ("%s: TOM activated but softc NULL", __func__));
3416 			KASSERT(sc->flags & TOM_INIT_DONE,
3417 			    ("%s: TOM activated but flag not set", __func__));
3418 		}
3419 
3420 		setbit(&sc->offload_map, pi->port_id);
3421 
3422 		/*
3423 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3424 		 * enabled on any port.  Need to figure out how to enable,
3425 		 * disable, load, and unload iWARP cleanly.
3426 		 */
3427 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3428 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3429 			setbit(&sc->offload_map, MAX_NPORTS);
3430 	} else {
3431 		if (!isset(&sc->offload_map, pi->port_id))
3432 			return (0);
3433 
3434 		KASSERT(sc->flags & TOM_INIT_DONE,
3435 		    ("%s: TOM never initialized?", __func__));
3436 		clrbit(&sc->offload_map, pi->port_id);
3437 	}
3438 
3439 	return (0);
3440 }
3441 
3442 /*
3443  * Add an upper layer driver to the global list.
3444  */
3445 int
3446 t3_register_uld(struct uld_info *ui)
3447 {
3448 	int rc = 0;
3449 	struct uld_info *u;
3450 
3451 	mtx_lock(&t3_uld_list_lock);
3452 	SLIST_FOREACH(u, &t3_uld_list, link) {
3453 	    if (u->uld_id == ui->uld_id) {
3454 		    rc = EEXIST;
3455 		    goto done;
3456 	    }
3457 	}
3458 
3459 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3460 	ui->refcount = 0;
3461 done:
3462 	mtx_unlock(&t3_uld_list_lock);
3463 	return (rc);
3464 }
3465 
3466 int
3467 t3_unregister_uld(struct uld_info *ui)
3468 {
3469 	int rc = EINVAL;
3470 	struct uld_info *u;
3471 
3472 	mtx_lock(&t3_uld_list_lock);
3473 
3474 	SLIST_FOREACH(u, &t3_uld_list, link) {
3475 	    if (u == ui) {
3476 		    if (ui->refcount > 0) {
3477 			    rc = EBUSY;
3478 			    goto done;
3479 		    }
3480 
3481 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3482 		    rc = 0;
3483 		    goto done;
3484 	    }
3485 	}
3486 done:
3487 	mtx_unlock(&t3_uld_list_lock);
3488 	return (rc);
3489 }
3490 
3491 int
3492 t3_activate_uld(struct adapter *sc, int id)
3493 {
3494 	int rc = EAGAIN;
3495 	struct uld_info *ui;
3496 
3497 	mtx_lock(&t3_uld_list_lock);
3498 
3499 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3500 		if (ui->uld_id == id) {
3501 			rc = ui->activate(sc);
3502 			if (rc == 0)
3503 				ui->refcount++;
3504 			goto done;
3505 		}
3506 	}
3507 done:
3508 	mtx_unlock(&t3_uld_list_lock);
3509 
3510 	return (rc);
3511 }
3512 
3513 int
3514 t3_deactivate_uld(struct adapter *sc, int id)
3515 {
3516 	int rc = EINVAL;
3517 	struct uld_info *ui;
3518 
3519 	mtx_lock(&t3_uld_list_lock);
3520 
3521 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3522 		if (ui->uld_id == id) {
3523 			rc = ui->deactivate(sc);
3524 			if (rc == 0)
3525 				ui->refcount--;
3526 			goto done;
3527 		}
3528 	}
3529 done:
3530 	mtx_unlock(&t3_uld_list_lock);
3531 
3532 	return (rc);
3533 }
3534 
3535 static int
3536 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3537     struct mbuf *m)
3538 {
3539 	m_freem(m);
3540 	return (EDOOFUS);
3541 }
3542 
3543 int
3544 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3545 {
3546 	uintptr_t *loc, new;
3547 
3548 	if (opcode >= NUM_CPL_HANDLERS)
3549 		return (EINVAL);
3550 
3551 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3552 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3553 	atomic_store_rel_ptr(loc, new);
3554 
3555 	return (0);
3556 }
3557 #endif
3558 
3559 static int
3560 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3561 {
3562 	int rc = 0;
3563 
3564 	switch (cmd) {
3565 	case MOD_LOAD:
3566 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3567 		SLIST_INIT(&t3_list);
3568 #ifdef TCP_OFFLOAD
3569 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3570 		SLIST_INIT(&t3_uld_list);
3571 #endif
3572 		break;
3573 
3574 	case MOD_UNLOAD:
3575 #ifdef TCP_OFFLOAD
3576 		mtx_lock(&t3_uld_list_lock);
3577 		if (!SLIST_EMPTY(&t3_uld_list)) {
3578 			rc = EBUSY;
3579 			mtx_unlock(&t3_uld_list_lock);
3580 			break;
3581 		}
3582 		mtx_unlock(&t3_uld_list_lock);
3583 		mtx_destroy(&t3_uld_list_lock);
3584 #endif
3585 		mtx_lock(&t3_list_lock);
3586 		if (!SLIST_EMPTY(&t3_list)) {
3587 			rc = EBUSY;
3588 			mtx_unlock(&t3_list_lock);
3589 			break;
3590 		}
3591 		mtx_unlock(&t3_list_lock);
3592 		mtx_destroy(&t3_list_lock);
3593 		break;
3594 	}
3595 
3596 	return (rc);
3597 }
3598 
3599 #ifdef DEBUGNET
3600 static void
3601 cxgb_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
3602 {
3603 	struct port_info *pi;
3604 	adapter_t *adap;
3605 
3606 	pi = if_getsoftc(ifp);
3607 	adap = pi->adapter;
3608 	ADAPTER_LOCK(adap);
3609 	*nrxr = adap->nqsets;
3610 	*ncl = adap->sge.qs[0].fl[1].size;
3611 	*clsize = adap->sge.qs[0].fl[1].buf_size;
3612 	ADAPTER_UNLOCK(adap);
3613 }
3614 
3615 static void
3616 cxgb_debugnet_event(struct ifnet *ifp, enum debugnet_ev event)
3617 {
3618 	struct port_info *pi;
3619 	struct sge_qset *qs;
3620 	int i;
3621 
3622 	pi = if_getsoftc(ifp);
3623 	if (event == DEBUGNET_START)
3624 		for (i = 0; i < pi->adapter->nqsets; i++) {
3625 			qs = &pi->adapter->sge.qs[i];
3626 
3627 			/* Need to reinit after debugnet_mbuf_start(). */
3628 			qs->fl[0].zone = zone_pack;
3629 			qs->fl[1].zone = zone_clust;
3630 			qs->lro.enabled = 0;
3631 		}
3632 }
3633 
3634 static int
3635 cxgb_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
3636 {
3637 	struct port_info *pi;
3638 	struct sge_qset *qs;
3639 
3640 	pi = if_getsoftc(ifp);
3641 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3642 	    IFF_DRV_RUNNING)
3643 		return (ENOENT);
3644 
3645 	qs = &pi->adapter->sge.qs[pi->first_qset];
3646 	return (cxgb_debugnet_encap(qs, &m));
3647 }
3648 
3649 static int
3650 cxgb_debugnet_poll(struct ifnet *ifp, int count)
3651 {
3652 	struct port_info *pi;
3653 	adapter_t *adap;
3654 	int i;
3655 
3656 	pi = if_getsoftc(ifp);
3657 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3658 		return (ENOENT);
3659 
3660 	adap = pi->adapter;
3661 	for (i = 0; i < adap->nqsets; i++)
3662 		(void)cxgb_debugnet_poll_rx(adap, &adap->sge.qs[i]);
3663 	(void)cxgb_debugnet_poll_tx(&adap->sge.qs[pi->first_qset]);
3664 	return (0);
3665 }
3666 #endif /* DEBUGNET */
3667