xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 #include "opt_inet.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/bus.h>
38 #include <sys/module.h>
39 #include <sys/pciio.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/debugnet.h>
60 #include <net/ethernet.h>
61 #include <net/if.h>
62 #include <net/if_var.h>
63 #include <net/if_arp.h>
64 #include <net/if_dl.h>
65 #include <net/if_media.h>
66 #include <net/if_types.h>
67 #include <net/if_vlan_var.h>
68 
69 #include <netinet/in_systm.h>
70 #include <netinet/in.h>
71 #include <netinet/if_ether.h>
72 #include <netinet/ip.h>
73 #include <netinet/ip.h>
74 #include <netinet/tcp.h>
75 #include <netinet/udp.h>
76 
77 #include <dev/pci/pcireg.h>
78 #include <dev/pci/pcivar.h>
79 #include <dev/pci/pci_private.h>
80 
81 #include <cxgb_include.h>
82 
83 #ifdef PRIV_SUPPORTED
84 #include <sys/priv.h>
85 #endif
86 
87 static int cxgb_setup_interrupts(adapter_t *);
88 static void cxgb_teardown_interrupts(adapter_t *);
89 static void cxgb_init(void *);
90 static int cxgb_init_locked(struct port_info *);
91 static int cxgb_uninit_locked(struct port_info *);
92 static int cxgb_uninit_synchronized(struct port_info *);
93 static int cxgb_ioctl(if_t, unsigned long, caddr_t);
94 static int cxgb_media_change(if_t);
95 static int cxgb_ifm_type(int);
96 static void cxgb_build_medialist(struct port_info *);
97 static void cxgb_media_status(if_t, struct ifmediareq *);
98 static uint64_t cxgb_get_counter(if_t, ift_counter);
99 static int setup_sge_qsets(adapter_t *);
100 static void cxgb_async_intr(void *);
101 static void cxgb_tick_handler(void *, int);
102 static void cxgb_tick(void *);
103 static void link_check_callout(void *);
104 static void check_link_status(void *, int);
105 static void setup_rss(adapter_t *sc);
106 static int alloc_filters(struct adapter *);
107 static int setup_hw_filters(struct adapter *);
108 static int set_filter(struct adapter *, int, const struct filter_info *);
109 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
110     unsigned int, u64, u64);
111 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
112     unsigned int, u64, u64);
113 #ifdef TCP_OFFLOAD
114 static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
115 #endif
116 
117 /* Attachment glue for the PCI controller end of the device.  Each port of
118  * the device is attached separately, as defined later.
119  */
120 static int cxgb_controller_probe(device_t);
121 static int cxgb_controller_attach(device_t);
122 static int cxgb_controller_detach(device_t);
123 static void cxgb_free(struct adapter *);
124 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
125     unsigned int end);
126 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
127 static int cxgb_get_regs_len(void);
128 static void touch_bars(device_t dev);
129 static void cxgb_update_mac_settings(struct port_info *p);
130 #ifdef TCP_OFFLOAD
131 static int toe_capability(struct port_info *, int);
132 #endif
133 
134 /* Table for probing the cards.  The desc field isn't actually used */
135 struct cxgb_ident {
136 	uint16_t	vendor;
137 	uint16_t	device;
138 	int		index;
139 	char		*desc;
140 } cxgb_identifiers[] = {
141 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
142 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
143 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
144 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
145 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
146 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
147 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
148 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
149 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
150 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
151 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
152 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
153 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
154 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
155 	{0, 0, 0, NULL}
156 };
157 
158 static device_method_t cxgb_controller_methods[] = {
159 	DEVMETHOD(device_probe,		cxgb_controller_probe),
160 	DEVMETHOD(device_attach,	cxgb_controller_attach),
161 	DEVMETHOD(device_detach,	cxgb_controller_detach),
162 
163 	DEVMETHOD_END
164 };
165 
166 static driver_t cxgb_controller_driver = {
167 	"cxgbc",
168 	cxgb_controller_methods,
169 	sizeof(struct adapter)
170 };
171 
172 static int cxgbc_mod_event(module_t, int, void *);
173 
174 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgbc_mod_event, NULL);
175 MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
176     nitems(cxgb_identifiers) - 1);
177 MODULE_VERSION(cxgbc, 1);
178 MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
179 
180 /*
181  * Attachment glue for the ports.  Attachment is done directly to the
182  * controller device.
183  */
184 static int cxgb_port_probe(device_t);
185 static int cxgb_port_attach(device_t);
186 static int cxgb_port_detach(device_t);
187 
188 static device_method_t cxgb_port_methods[] = {
189 	DEVMETHOD(device_probe,		cxgb_port_probe),
190 	DEVMETHOD(device_attach,	cxgb_port_attach),
191 	DEVMETHOD(device_detach,	cxgb_port_detach),
192 	{ 0, 0 }
193 };
194 
195 static driver_t cxgb_port_driver = {
196 	"cxgb",
197 	cxgb_port_methods,
198 	0
199 };
200 
201 static d_ioctl_t cxgb_extension_ioctl;
202 static d_open_t cxgb_extension_open;
203 static d_close_t cxgb_extension_close;
204 
205 static struct cdevsw cxgb_cdevsw = {
206        .d_version =    D_VERSION,
207        .d_flags =      0,
208        .d_open =       cxgb_extension_open,
209        .d_close =      cxgb_extension_close,
210        .d_ioctl =      cxgb_extension_ioctl,
211        .d_name =       "cxgb",
212 };
213 
214 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, 0, 0);
215 MODULE_VERSION(cxgb, 1);
216 
217 DEBUGNET_DEFINE(cxgb);
218 
219 static struct mtx t3_list_lock;
220 static SLIST_HEAD(, adapter) t3_list;
221 #ifdef TCP_OFFLOAD
222 static struct mtx t3_uld_list_lock;
223 static SLIST_HEAD(, uld_info) t3_uld_list;
224 #endif
225 
226 /*
227  * The driver uses the best interrupt scheme available on a platform in the
228  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
229  * of these schemes the driver may consider as follows:
230  *
231  * msi = 2: choose from among all three options
232  * msi = 1 : only consider MSI and pin interrupts
233  * msi = 0: force pin interrupts
234  */
235 static int msi_allowed = 2;
236 
237 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
238     "CXGB driver parameters");
239 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
240     "MSI-X, MSI, INTx selector");
241 
242 /*
243  * The driver uses an auto-queue algorithm by default.
244  * To disable it and force a single queue-set per port, use multiq = 0
245  */
246 static int multiq = 1;
247 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
248     "use min(ncpus/ports, 8) queue-sets per port");
249 
250 /*
251  * By default the driver will not update the firmware unless
252  * it was compiled against a newer version
253  *
254  */
255 static int force_fw_update = 0;
256 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
257     "update firmware even if up to date");
258 
259 int cxgb_use_16k_clusters = -1;
260 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
261     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
262 
263 static int nfilters = -1;
264 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
265     &nfilters, 0, "max number of entries in the filter table");
266 
267 enum {
268 	MAX_TXQ_ENTRIES      = 16384,
269 	MAX_CTRL_TXQ_ENTRIES = 1024,
270 	MAX_RSPQ_ENTRIES     = 16384,
271 	MAX_RX_BUFFERS       = 16384,
272 	MAX_RX_JUMBO_BUFFERS = 16384,
273 	MIN_TXQ_ENTRIES      = 4,
274 	MIN_CTRL_TXQ_ENTRIES = 4,
275 	MIN_RSPQ_ENTRIES     = 32,
276 	MIN_FL_ENTRIES       = 32,
277 	MIN_FL_JUMBO_ENTRIES = 32
278 };
279 
280 struct filter_info {
281 	u32 sip;
282 	u32 sip_mask;
283 	u32 dip;
284 	u16 sport;
285 	u16 dport;
286 	u32 vlan:12;
287 	u32 vlan_prio:3;
288 	u32 mac_hit:1;
289 	u32 mac_idx:4;
290 	u32 mac_vld:1;
291 	u32 pkt_type:2;
292 	u32 report_filter_id:1;
293 	u32 pass:1;
294 	u32 rss:1;
295 	u32 qset:3;
296 	u32 locked:1;
297 	u32 valid:1;
298 };
299 
300 enum { FILTER_NO_VLAN_PRI = 7 };
301 
302 #define EEPROM_MAGIC 0x38E2F10C
303 
304 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
305 
306 
307 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
308 
309 
310 static __inline char
311 t3rev2char(struct adapter *adapter)
312 {
313 	char rev = 'z';
314 
315 	switch(adapter->params.rev) {
316 	case T3_REV_A:
317 		rev = 'a';
318 		break;
319 	case T3_REV_B:
320 	case T3_REV_B2:
321 		rev = 'b';
322 		break;
323 	case T3_REV_C:
324 		rev = 'c';
325 		break;
326 	}
327 	return rev;
328 }
329 
330 static struct cxgb_ident *
331 cxgb_get_ident(device_t dev)
332 {
333 	struct cxgb_ident *id;
334 
335 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
336 		if ((id->vendor == pci_get_vendor(dev)) &&
337 		    (id->device == pci_get_device(dev))) {
338 			return (id);
339 		}
340 	}
341 	return (NULL);
342 }
343 
344 static const struct adapter_info *
345 cxgb_get_adapter_info(device_t dev)
346 {
347 	struct cxgb_ident *id;
348 	const struct adapter_info *ai;
349 
350 	id = cxgb_get_ident(dev);
351 	if (id == NULL)
352 		return (NULL);
353 
354 	ai = t3_get_adapter_info(id->index);
355 
356 	return (ai);
357 }
358 
359 static int
360 cxgb_controller_probe(device_t dev)
361 {
362 	const struct adapter_info *ai;
363 	char *ports, buf[80];
364 	int nports;
365 
366 	ai = cxgb_get_adapter_info(dev);
367 	if (ai == NULL)
368 		return (ENXIO);
369 
370 	nports = ai->nports0 + ai->nports1;
371 	if (nports == 1)
372 		ports = "port";
373 	else
374 		ports = "ports";
375 
376 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
377 	device_set_desc_copy(dev, buf);
378 	return (BUS_PROBE_DEFAULT);
379 }
380 
381 #define FW_FNAME "cxgb_t3fw"
382 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
383 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
384 
385 static int
386 upgrade_fw(adapter_t *sc)
387 {
388 	const struct firmware *fw;
389 	int status;
390 	u32 vers;
391 
392 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
393 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
394 		return (ENOENT);
395 	} else
396 		device_printf(sc->dev, "installing firmware on card\n");
397 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
398 
399 	if (status != 0) {
400 		device_printf(sc->dev, "failed to install firmware: %d\n",
401 		    status);
402 	} else {
403 		t3_get_fw_version(sc, &vers);
404 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
405 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
406 		    G_FW_VERSION_MICRO(vers));
407 	}
408 
409 	firmware_put(fw, FIRMWARE_UNLOAD);
410 
411 	return (status);
412 }
413 
414 /*
415  * The cxgb_controller_attach function is responsible for the initial
416  * bringup of the device.  Its responsibilities include:
417  *
418  *  1. Determine if the device supports MSI or MSI-X.
419  *  2. Allocate bus resources so that we can access the Base Address Register
420  *  3. Create and initialize mutexes for the controller and its control
421  *     logic such as SGE and MDIO.
422  *  4. Call hardware specific setup routine for the adapter as a whole.
423  *  5. Allocate the BAR for doing MSI-X.
424  *  6. Setup the line interrupt iff MSI-X is not supported.
425  *  7. Create the driver's taskq.
426  *  8. Start one task queue service thread.
427  *  9. Check if the firmware and SRAM are up-to-date.  They will be
428  *     auto-updated later (before FULL_INIT_DONE), if required.
429  * 10. Create a child device for each MAC (port)
430  * 11. Initialize T3 private state.
431  * 12. Trigger the LED
432  * 13. Setup offload iff supported.
433  * 14. Reset/restart the tick callout.
434  * 15. Attach sysctls
435  *
436  * NOTE: Any modification or deviation from this list MUST be reflected in
437  * the above comment.  Failure to do so will result in problems on various
438  * error conditions including link flapping.
439  */
440 static int
441 cxgb_controller_attach(device_t dev)
442 {
443 	device_t child;
444 	const struct adapter_info *ai;
445 	struct adapter *sc;
446 	int i, error = 0;
447 	uint32_t vers;
448 	int port_qsets = 1;
449 	int msi_needed, reg;
450 	char buf[80];
451 
452 	sc = device_get_softc(dev);
453 	sc->dev = dev;
454 	sc->msi_count = 0;
455 	ai = cxgb_get_adapter_info(dev);
456 
457 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
458 	    device_get_unit(dev));
459 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
460 
461 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
462 	    device_get_unit(dev));
463 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
464 	    device_get_unit(dev));
465 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
466 	    device_get_unit(dev));
467 
468 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
469 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
470 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
471 
472 	mtx_lock(&t3_list_lock);
473 	SLIST_INSERT_HEAD(&t3_list, sc, link);
474 	mtx_unlock(&t3_list_lock);
475 
476 	/* find the PCIe link width and set max read request to 4KB*/
477 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
478 		uint16_t lnk;
479 
480 		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
481 		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
482 		if (sc->link_width < 8 &&
483 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
484 			device_printf(sc->dev,
485 			    "PCIe x%d Link, expect reduced performance\n",
486 			    sc->link_width);
487 		}
488 
489 		pci_set_max_read_req(dev, 4096);
490 	}
491 
492 	touch_bars(dev);
493 	pci_enable_busmaster(dev);
494 	/*
495 	 * Allocate the registers and make them available to the driver.
496 	 * The registers that we care about for NIC mode are in BAR 0
497 	 */
498 	sc->regs_rid = PCIR_BAR(0);
499 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
500 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
501 		device_printf(dev, "Cannot allocate BAR region 0\n");
502 		error = ENXIO;
503 		goto out;
504 	}
505 
506 	sc->bt = rman_get_bustag(sc->regs_res);
507 	sc->bh = rman_get_bushandle(sc->regs_res);
508 	sc->mmio_len = rman_get_size(sc->regs_res);
509 
510 	for (i = 0; i < MAX_NPORTS; i++)
511 		sc->port[i].adapter = sc;
512 
513 	if (t3_prep_adapter(sc, ai, 1) < 0) {
514 		printf("prep adapter failed\n");
515 		error = ENODEV;
516 		goto out;
517 	}
518 
519 	sc->udbs_rid = PCIR_BAR(2);
520 	sc->udbs_res = NULL;
521 	if (is_offload(sc) &&
522 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
524 		device_printf(dev, "Cannot allocate BAR region 1\n");
525 		error = ENXIO;
526 		goto out;
527 	}
528 
529         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
530 	 * enough messages for the queue sets.  If that fails, try falling
531 	 * back to MSI.  If that fails, then try falling back to the legacy
532 	 * interrupt pin model.
533 	 */
534 	sc->msix_regs_rid = 0x20;
535 	if ((msi_allowed >= 2) &&
536 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
537 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
538 
539 		if (multiq)
540 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
541 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
542 
543 		if (pci_msix_count(dev) == 0 ||
544 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
545 		    sc->msi_count != msi_needed) {
546 			device_printf(dev, "alloc msix failed - "
547 				      "msi_count=%d, msi_needed=%d, err=%d; "
548 				      "will try MSI\n", sc->msi_count,
549 				      msi_needed, error);
550 			sc->msi_count = 0;
551 			port_qsets = 1;
552 			pci_release_msi(dev);
553 			bus_release_resource(dev, SYS_RES_MEMORY,
554 			    sc->msix_regs_rid, sc->msix_regs_res);
555 			sc->msix_regs_res = NULL;
556 		} else {
557 			sc->flags |= USING_MSIX;
558 			sc->cxgb_intr = cxgb_async_intr;
559 			device_printf(dev,
560 				      "using MSI-X interrupts (%u vectors)\n",
561 				      sc->msi_count);
562 		}
563 	}
564 
565 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
566 		sc->msi_count = 1;
567 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
568 			device_printf(dev, "alloc msi failed - "
569 				      "err=%d; will try INTx\n", error);
570 			sc->msi_count = 0;
571 			port_qsets = 1;
572 			pci_release_msi(dev);
573 		} else {
574 			sc->flags |= USING_MSI;
575 			sc->cxgb_intr = t3_intr_msi;
576 			device_printf(dev, "using MSI interrupts\n");
577 		}
578 	}
579 	if (sc->msi_count == 0) {
580 		device_printf(dev, "using line interrupts\n");
581 		sc->cxgb_intr = t3b_intr;
582 	}
583 
584 	/* Create a private taskqueue thread for handling driver events */
585 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
586 	    taskqueue_thread_enqueue, &sc->tq);
587 	if (sc->tq == NULL) {
588 		device_printf(dev, "failed to allocate controller task queue\n");
589 		goto out;
590 	}
591 
592 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
593 	    device_get_nameunit(dev));
594 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
595 
596 
597 	/* Create a periodic callout for checking adapter status */
598 	callout_init(&sc->cxgb_tick_ch, 1);
599 
600 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
601 		/*
602 		 * Warn user that a firmware update will be attempted in init.
603 		 */
604 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
605 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
606 		sc->flags &= ~FW_UPTODATE;
607 	} else {
608 		sc->flags |= FW_UPTODATE;
609 	}
610 
611 	if (t3_check_tpsram_version(sc) < 0) {
612 		/*
613 		 * Warn user that a firmware update will be attempted in init.
614 		 */
615 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
616 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
617 		sc->flags &= ~TPS_UPTODATE;
618 	} else {
619 		sc->flags |= TPS_UPTODATE;
620 	}
621 
622 	/*
623 	 * Create a child device for each MAC.  The ethernet attachment
624 	 * will be done in these children.
625 	 */
626 	for (i = 0; i < (sc)->params.nports; i++) {
627 		struct port_info *pi;
628 
629 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
630 			device_printf(dev, "failed to add child port\n");
631 			error = EINVAL;
632 			goto out;
633 		}
634 		pi = &sc->port[i];
635 		pi->adapter = sc;
636 		pi->nqsets = port_qsets;
637 		pi->first_qset = i*port_qsets;
638 		pi->port_id = i;
639 		pi->tx_chan = i >= ai->nports0;
640 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
641 		sc->rxpkt_map[pi->txpkt_intf] = i;
642 		sc->port[i].tx_chan = i >= ai->nports0;
643 		sc->portdev[i] = child;
644 		device_set_softc(child, pi);
645 	}
646 	if ((error = bus_generic_attach(dev)) != 0)
647 		goto out;
648 
649 	/* initialize sge private state */
650 	t3_sge_init_adapter(sc);
651 
652 	t3_led_ready(sc);
653 
654 	error = t3_get_fw_version(sc, &vers);
655 	if (error)
656 		goto out;
657 
658 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
659 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
660 	    G_FW_VERSION_MICRO(vers));
661 
662 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
663 		 ai->desc, is_offload(sc) ? "R" : "",
664 		 sc->params.vpd.ec, sc->params.vpd.sn);
665 	device_set_desc_copy(dev, buf);
666 
667 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
668 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
669 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
670 
671 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
672 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
673 	t3_add_attach_sysctls(sc);
674 
675 #ifdef TCP_OFFLOAD
676 	for (i = 0; i < NUM_CPL_HANDLERS; i++)
677 		sc->cpl_handler[i] = cpl_not_handled;
678 #endif
679 
680 	t3_intr_clear(sc);
681 	error = cxgb_setup_interrupts(sc);
682 out:
683 	if (error)
684 		cxgb_free(sc);
685 
686 	return (error);
687 }
688 
689 /*
690  * The cxgb_controller_detach routine is called with the device is
691  * unloaded from the system.
692  */
693 
694 static int
695 cxgb_controller_detach(device_t dev)
696 {
697 	struct adapter *sc;
698 
699 	sc = device_get_softc(dev);
700 
701 	cxgb_free(sc);
702 
703 	return (0);
704 }
705 
706 /*
707  * The cxgb_free() is called by the cxgb_controller_detach() routine
708  * to tear down the structures that were built up in
709  * cxgb_controller_attach(), and should be the final piece of work
710  * done when fully unloading the driver.
711  *
712  *
713  *  1. Shutting down the threads started by the cxgb_controller_attach()
714  *     routine.
715  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
716  *  3. Detaching all of the port devices created during the
717  *     cxgb_controller_attach() routine.
718  *  4. Removing the device children created via cxgb_controller_attach().
719  *  5. Releasing PCI resources associated with the device.
720  *  6. Turning off the offload support, iff it was turned on.
721  *  7. Destroying the mutexes created in cxgb_controller_attach().
722  *
723  */
724 static void
725 cxgb_free(struct adapter *sc)
726 {
727 	int i, nqsets = 0;
728 
729 	ADAPTER_LOCK(sc);
730 	sc->flags |= CXGB_SHUTDOWN;
731 	ADAPTER_UNLOCK(sc);
732 
733 	/*
734 	 * Make sure all child devices are gone.
735 	 */
736 	bus_generic_detach(sc->dev);
737 	for (i = 0; i < (sc)->params.nports; i++) {
738 		if (sc->portdev[i] &&
739 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
740 			device_printf(sc->dev, "failed to delete child port\n");
741 		nqsets += sc->port[i].nqsets;
742 	}
743 
744 	/*
745 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
746 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
747 	 * all open devices have been closed.
748 	 */
749 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
750 					   __func__, sc->open_device_map));
751 	for (i = 0; i < sc->params.nports; i++) {
752 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
753 						  __func__, i));
754 	}
755 
756 	/*
757 	 * Finish off the adapter's callouts.
758 	 */
759 	callout_drain(&sc->cxgb_tick_ch);
760 	callout_drain(&sc->sge_timer_ch);
761 
762 	/*
763 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
764 	 * sysctls are cleaned up by the kernel linker.
765 	 */
766 	if (sc->flags & FULL_INIT_DONE) {
767  		t3_free_sge_resources(sc, nqsets);
768  		sc->flags &= ~FULL_INIT_DONE;
769  	}
770 
771 	/*
772 	 * Release all interrupt resources.
773 	 */
774 	cxgb_teardown_interrupts(sc);
775 	if (sc->flags & (USING_MSI | USING_MSIX)) {
776 		device_printf(sc->dev, "releasing msi message(s)\n");
777 		pci_release_msi(sc->dev);
778 	} else {
779 		device_printf(sc->dev, "no msi message to release\n");
780 	}
781 
782 	if (sc->msix_regs_res != NULL) {
783 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
784 		    sc->msix_regs_res);
785 	}
786 
787 	/*
788 	 * Free the adapter's taskqueue.
789 	 */
790 	if (sc->tq != NULL) {
791 		taskqueue_free(sc->tq);
792 		sc->tq = NULL;
793 	}
794 
795 	free(sc->filters, M_DEVBUF);
796 	t3_sge_free(sc);
797 
798 	if (sc->udbs_res != NULL)
799 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
800 		    sc->udbs_res);
801 
802 	if (sc->regs_res != NULL)
803 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
804 		    sc->regs_res);
805 
806 	MTX_DESTROY(&sc->mdio_lock);
807 	MTX_DESTROY(&sc->sge.reg_lock);
808 	MTX_DESTROY(&sc->elmer_lock);
809 	mtx_lock(&t3_list_lock);
810 	SLIST_REMOVE(&t3_list, sc, adapter, link);
811 	mtx_unlock(&t3_list_lock);
812 	ADAPTER_LOCK_DEINIT(sc);
813 }
814 
815 /**
816  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
817  *	@sc: the controller softc
818  *
819  *	Determines how many sets of SGE queues to use and initializes them.
820  *	We support multiple queue sets per port if we have MSI-X, otherwise
821  *	just one queue set per port.
822  */
823 static int
824 setup_sge_qsets(adapter_t *sc)
825 {
826 	int i, j, err, irq_idx = 0, qset_idx = 0;
827 	u_int ntxq = SGE_TXQ_PER_SET;
828 
829 	if ((err = t3_sge_alloc(sc)) != 0) {
830 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
831 		return (err);
832 	}
833 
834 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
835 		irq_idx = -1;
836 
837 	for (i = 0; i < (sc)->params.nports; i++) {
838 		struct port_info *pi = &sc->port[i];
839 
840 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
841 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
842 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
843 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
844 			if (err) {
845 				t3_free_sge_resources(sc, qset_idx);
846 				device_printf(sc->dev,
847 				    "t3_sge_alloc_qset failed with %d\n", err);
848 				return (err);
849 			}
850 		}
851 	}
852 
853 	sc->nqsets = qset_idx;
854 
855 	return (0);
856 }
857 
858 static void
859 cxgb_teardown_interrupts(adapter_t *sc)
860 {
861 	int i;
862 
863 	for (i = 0; i < SGE_QSETS; i++) {
864 		if (sc->msix_intr_tag[i] == NULL) {
865 
866 			/* Should have been setup fully or not at all */
867 			KASSERT(sc->msix_irq_res[i] == NULL &&
868 				sc->msix_irq_rid[i] == 0,
869 				("%s: half-done interrupt (%d).", __func__, i));
870 
871 			continue;
872 		}
873 
874 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
875 				  sc->msix_intr_tag[i]);
876 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
877 				     sc->msix_irq_res[i]);
878 
879 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
880 		sc->msix_irq_rid[i] = 0;
881 	}
882 
883 	if (sc->intr_tag) {
884 		KASSERT(sc->irq_res != NULL,
885 			("%s: half-done interrupt.", __func__));
886 
887 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
888 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
889 				     sc->irq_res);
890 
891 		sc->irq_res = sc->intr_tag = NULL;
892 		sc->irq_rid = 0;
893 	}
894 }
895 
896 static int
897 cxgb_setup_interrupts(adapter_t *sc)
898 {
899 	struct resource *res;
900 	void *tag;
901 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
902 
903 	sc->irq_rid = intr_flag ? 1 : 0;
904 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
905 					     RF_SHAREABLE | RF_ACTIVE);
906 	if (sc->irq_res == NULL) {
907 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
908 			      intr_flag, sc->irq_rid);
909 		err = EINVAL;
910 		sc->irq_rid = 0;
911 	} else {
912 		err = bus_setup_intr(sc->dev, sc->irq_res,
913 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
914 		    sc->cxgb_intr, sc, &sc->intr_tag);
915 
916 		if (err) {
917 			device_printf(sc->dev,
918 				      "Cannot set up interrupt (%x, %u, %d)\n",
919 				      intr_flag, sc->irq_rid, err);
920 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
921 					     sc->irq_res);
922 			sc->irq_res = sc->intr_tag = NULL;
923 			sc->irq_rid = 0;
924 		}
925 	}
926 
927 	/* That's all for INTx or MSI */
928 	if (!(intr_flag & USING_MSIX) || err)
929 		return (err);
930 
931 	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
932 	for (i = 0; i < sc->msi_count - 1; i++) {
933 		rid = i + 2;
934 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
935 					     RF_SHAREABLE | RF_ACTIVE);
936 		if (res == NULL) {
937 			device_printf(sc->dev, "Cannot allocate interrupt "
938 				      "for message %d\n", rid);
939 			err = EINVAL;
940 			break;
941 		}
942 
943 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
944 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
945 		if (err) {
946 			device_printf(sc->dev, "Cannot set up interrupt "
947 				      "for message %d (%d)\n", rid, err);
948 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
949 			break;
950 		}
951 
952 		sc->msix_irq_rid[i] = rid;
953 		sc->msix_irq_res[i] = res;
954 		sc->msix_intr_tag[i] = tag;
955 		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
956 	}
957 
958 	if (err)
959 		cxgb_teardown_interrupts(sc);
960 
961 	return (err);
962 }
963 
964 
965 static int
966 cxgb_port_probe(device_t dev)
967 {
968 	struct port_info *p;
969 	char buf[80];
970 	const char *desc;
971 
972 	p = device_get_softc(dev);
973 	desc = p->phy.desc;
974 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
975 	device_set_desc_copy(dev, buf);
976 	return (0);
977 }
978 
979 
980 static int
981 cxgb_makedev(struct port_info *pi)
982 {
983 
984 	pi->port_cdev = make_dev(&cxgb_cdevsw, if_getdunit(pi->ifp),
985 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
986 
987 	if (pi->port_cdev == NULL)
988 		return (ENOMEM);
989 
990 	pi->port_cdev->si_drv1 = (void *)pi;
991 
992 	return (0);
993 }
994 
995 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
996     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
997     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
998 #define CXGB_CAP_ENABLE CXGB_CAP
999 
1000 static int
1001 cxgb_port_attach(device_t dev)
1002 {
1003 	struct port_info *p;
1004 	if_t ifp;
1005 	int err;
1006 	struct adapter *sc;
1007 
1008 	p = device_get_softc(dev);
1009 	sc = p->adapter;
1010 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1011 	    device_get_unit(device_get_parent(dev)), p->port_id);
1012 	PORT_LOCK_INIT(p, p->lockbuf);
1013 
1014 	callout_init(&p->link_check_ch, 1);
1015 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1016 
1017 	/* Allocate an ifnet object and set it up */
1018 	ifp = p->ifp = if_alloc(IFT_ETHER);
1019 	if (ifp == NULL) {
1020 		device_printf(dev, "Cannot allocate ifnet\n");
1021 		return (ENOMEM);
1022 	}
1023 
1024 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1025 	if_setinitfn(ifp, cxgb_init);
1026 	if_setsoftc(ifp, p);
1027 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1028 	if_setioctlfn(ifp, cxgb_ioctl);
1029 	if_settransmitfn(ifp, cxgb_transmit);
1030 	if_setqflushfn(ifp, cxgb_qflush);
1031 	if_setgetcounterfn(ifp, cxgb_get_counter);
1032 
1033 	if_setcapabilities(ifp, CXGB_CAP);
1034 #ifdef TCP_OFFLOAD
1035 	if (is_offload(sc))
1036 		if_setcapabilitiesbit(ifp, IFCAP_TOE4, 0);
1037 #endif
1038 	if_setcapenable(ifp, CXGB_CAP_ENABLE);
1039 	if_sethwassist(ifp, CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1040 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1041 	if_sethwtsomax(ifp, IP_MAXPACKET);
1042 	if_sethwtsomaxsegcount(ifp, 36);
1043 	if_sethwtsomaxsegsize(ifp, 65536);
1044 
1045 	/*
1046 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1047 	 */
1048 	if (sc->params.nports > 2) {
1049 		if_setcapabilitiesbit(ifp, 0, IFCAP_TSO | IFCAP_VLAN_HWTSO);
1050 		if_setcapenablebit(ifp, 0, IFCAP_TSO | IFCAP_VLAN_HWTSO);
1051 		if_sethwassistbits(ifp, 0, CSUM_TSO);
1052 	}
1053 
1054 	ether_ifattach(ifp, p->hw_addr);
1055 
1056 	/* Attach driver debugnet methods. */
1057 	DEBUGNET_SET(ifp, cxgb);
1058 
1059 #ifdef DEFAULT_JUMBO
1060 	if (sc->params.nports <= 2)
1061 		if_setmtu(ifp, ETHERMTU_JUMBO);
1062 #endif
1063 	if ((err = cxgb_makedev(p)) != 0) {
1064 		printf("makedev failed %d\n", err);
1065 		return (err);
1066 	}
1067 
1068 	/* Create a list of media supported by this port */
1069 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1070 	    cxgb_media_status);
1071 	cxgb_build_medialist(p);
1072 
1073 	t3_sge_init_port(p);
1074 
1075 	return (err);
1076 }
1077 
1078 /*
1079  * cxgb_port_detach() is called via the device_detach methods when
1080  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1081  * removing the device from the view of the kernel, i.e. from all
1082  * interfaces lists etc.  This routine is only called when the driver is
1083  * being unloaded, not when the link goes down.
1084  */
1085 static int
1086 cxgb_port_detach(device_t dev)
1087 {
1088 	struct port_info *p;
1089 	struct adapter *sc;
1090 	int i;
1091 
1092 	p = device_get_softc(dev);
1093 	sc = p->adapter;
1094 
1095 	/* Tell cxgb_ioctl and if_init that the port is going away */
1096 	ADAPTER_LOCK(sc);
1097 	SET_DOOMED(p);
1098 	wakeup(&sc->flags);
1099 	while (IS_BUSY(sc))
1100 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1101 	SET_BUSY(sc);
1102 	ADAPTER_UNLOCK(sc);
1103 
1104 	if (p->port_cdev != NULL)
1105 		destroy_dev(p->port_cdev);
1106 
1107 	cxgb_uninit_synchronized(p);
1108 	ether_ifdetach(p->ifp);
1109 
1110 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1111 		struct sge_qset *qs = &sc->sge.qs[i];
1112 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1113 
1114 		callout_drain(&txq->txq_watchdog);
1115 		callout_drain(&txq->txq_timer);
1116 	}
1117 
1118 	PORT_LOCK_DEINIT(p);
1119 	if_free(p->ifp);
1120 	p->ifp = NULL;
1121 
1122 	ADAPTER_LOCK(sc);
1123 	CLR_BUSY(sc);
1124 	wakeup_one(&sc->flags);
1125 	ADAPTER_UNLOCK(sc);
1126 	return (0);
1127 }
1128 
1129 void
1130 t3_fatal_err(struct adapter *sc)
1131 {
1132 	u_int fw_status[4];
1133 
1134 	if (sc->flags & FULL_INIT_DONE) {
1135 		t3_sge_stop(sc);
1136 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1137 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1138 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1139 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1140 		t3_intr_disable(sc);
1141 	}
1142 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1143 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1144 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1145 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1146 }
1147 
1148 int
1149 t3_os_find_pci_capability(adapter_t *sc, int cap)
1150 {
1151 	device_t dev;
1152 	struct pci_devinfo *dinfo;
1153 	pcicfgregs *cfg;
1154 	uint32_t status;
1155 	uint8_t ptr;
1156 
1157 	dev = sc->dev;
1158 	dinfo = device_get_ivars(dev);
1159 	cfg = &dinfo->cfg;
1160 
1161 	status = pci_read_config(dev, PCIR_STATUS, 2);
1162 	if (!(status & PCIM_STATUS_CAPPRESENT))
1163 		return (0);
1164 
1165 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1166 	case 0:
1167 	case 1:
1168 		ptr = PCIR_CAP_PTR;
1169 		break;
1170 	case 2:
1171 		ptr = PCIR_CAP_PTR_2;
1172 		break;
1173 	default:
1174 		return (0);
1175 		break;
1176 	}
1177 	ptr = pci_read_config(dev, ptr, 1);
1178 
1179 	while (ptr != 0) {
1180 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1181 			return (ptr);
1182 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1183 	}
1184 
1185 	return (0);
1186 }
1187 
1188 int
1189 t3_os_pci_save_state(struct adapter *sc)
1190 {
1191 	device_t dev;
1192 	struct pci_devinfo *dinfo;
1193 
1194 	dev = sc->dev;
1195 	dinfo = device_get_ivars(dev);
1196 
1197 	pci_cfg_save(dev, dinfo, 0);
1198 	return (0);
1199 }
1200 
1201 int
1202 t3_os_pci_restore_state(struct adapter *sc)
1203 {
1204 	device_t dev;
1205 	struct pci_devinfo *dinfo;
1206 
1207 	dev = sc->dev;
1208 	dinfo = device_get_ivars(dev);
1209 
1210 	pci_cfg_restore(dev, dinfo);
1211 	return (0);
1212 }
1213 
1214 /**
1215  *	t3_os_link_changed - handle link status changes
1216  *	@sc: the adapter associated with the link change
1217  *	@port_id: the port index whose link status has changed
1218  *	@link_status: the new status of the link
1219  *	@speed: the new speed setting
1220  *	@duplex: the new duplex setting
1221  *	@fc: the new flow-control setting
1222  *
1223  *	This is the OS-dependent handler for link status changes.  The OS
1224  *	neutral handler takes care of most of the processing for these events,
1225  *	then calls this handler for any OS-specific processing.
1226  */
1227 void
1228 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1229      int duplex, int fc, int mac_was_reset)
1230 {
1231 	struct port_info *pi = &adapter->port[port_id];
1232 	if_t ifp = pi->ifp;
1233 
1234 	/* no race with detach, so ifp should always be good */
1235 	KASSERT(ifp, ("%s: if detached.", __func__));
1236 
1237 	/* Reapply mac settings if they were lost due to a reset */
1238 	if (mac_was_reset) {
1239 		PORT_LOCK(pi);
1240 		cxgb_update_mac_settings(pi);
1241 		PORT_UNLOCK(pi);
1242 	}
1243 
1244 	if (link_status) {
1245 		if_setbaudrate(ifp, IF_Mbps(speed));
1246 		if_link_state_change(ifp, LINK_STATE_UP);
1247 	} else
1248 		if_link_state_change(ifp, LINK_STATE_DOWN);
1249 }
1250 
1251 /**
1252  *	t3_os_phymod_changed - handle PHY module changes
1253  *	@phy: the PHY reporting the module change
1254  *	@mod_type: new module type
1255  *
1256  *	This is the OS-dependent handler for PHY module changes.  It is
1257  *	invoked when a PHY module is removed or inserted for any OS-specific
1258  *	processing.
1259  */
1260 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1261 {
1262 	static const char *mod_str[] = {
1263 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1264 	};
1265 	struct port_info *pi = &adap->port[port_id];
1266 	int mod = pi->phy.modtype;
1267 
1268 	if (mod != pi->media.ifm_cur->ifm_data)
1269 		cxgb_build_medialist(pi);
1270 
1271 	if (mod == phy_modtype_none)
1272 		if_printf(pi->ifp, "PHY module unplugged\n");
1273 	else {
1274 		KASSERT(mod < ARRAY_SIZE(mod_str),
1275 			("invalid PHY module type %d", mod));
1276 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1277 	}
1278 }
1279 
1280 void
1281 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1282 {
1283 
1284 	/*
1285 	 * The ifnet might not be allocated before this gets called,
1286 	 * as this is called early on in attach by t3_prep_adapter
1287 	 * save the address off in the port structure
1288 	 */
1289 	if (cxgb_debug)
1290 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1291 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1292 }
1293 
1294 /*
1295  * Programs the XGMAC based on the settings in the ifnet.  These settings
1296  * include MTU, MAC address, mcast addresses, etc.
1297  */
1298 static void
1299 cxgb_update_mac_settings(struct port_info *p)
1300 {
1301 	if_t ifp = p->ifp;
1302 	struct t3_rx_mode rm;
1303 	struct cmac *mac = &p->mac;
1304 	int mtu, hwtagging;
1305 
1306 	PORT_LOCK_ASSERT_OWNED(p);
1307 
1308 	bcopy(if_getlladdr(ifp), p->hw_addr, ETHER_ADDR_LEN);
1309 
1310 	mtu = if_getmtu(ifp);
1311 	if (if_getcapenable(ifp) & IFCAP_VLAN_MTU)
1312 		mtu += ETHER_VLAN_ENCAP_LEN;
1313 
1314 	hwtagging = (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) != 0;
1315 
1316 	t3_mac_set_mtu(mac, mtu);
1317 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1318 	t3_mac_set_address(mac, 0, p->hw_addr);
1319 	t3_init_rx_mode(&rm, p);
1320 	t3_mac_set_rx_mode(mac, &rm);
1321 }
1322 
1323 
1324 static int
1325 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1326 			      unsigned long n)
1327 {
1328 	int attempts = 5;
1329 
1330 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1331 		if (!--attempts)
1332 			return (ETIMEDOUT);
1333 		t3_os_sleep(10);
1334 	}
1335 	return 0;
1336 }
1337 
1338 static int
1339 init_tp_parity(struct adapter *adap)
1340 {
1341 	int i;
1342 	struct mbuf *m;
1343 	struct cpl_set_tcb_field *greq;
1344 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1345 
1346 	t3_tp_set_offload_mode(adap, 1);
1347 
1348 	for (i = 0; i < 16; i++) {
1349 		struct cpl_smt_write_req *req;
1350 
1351 		m = m_gethdr(M_WAITOK, MT_DATA);
1352 		req = mtod(m, struct cpl_smt_write_req *);
1353 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1354 		memset(req, 0, sizeof(*req));
1355 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1356 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1357 		req->iff = i;
1358 		t3_mgmt_tx(adap, m);
1359 	}
1360 
1361 	for (i = 0; i < 2048; i++) {
1362 		struct cpl_l2t_write_req *req;
1363 
1364 		m = m_gethdr(M_WAITOK, MT_DATA);
1365 		req = mtod(m, struct cpl_l2t_write_req *);
1366 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1367 		memset(req, 0, sizeof(*req));
1368 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1369 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1370 		req->params = htonl(V_L2T_W_IDX(i));
1371 		t3_mgmt_tx(adap, m);
1372 	}
1373 
1374 	for (i = 0; i < 2048; i++) {
1375 		struct cpl_rte_write_req *req;
1376 
1377 		m = m_gethdr(M_WAITOK, MT_DATA);
1378 		req = mtod(m, struct cpl_rte_write_req *);
1379 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1380 		memset(req, 0, sizeof(*req));
1381 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1383 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1384 		t3_mgmt_tx(adap, m);
1385 	}
1386 
1387 	m = m_gethdr(M_WAITOK, MT_DATA);
1388 	greq = mtod(m, struct cpl_set_tcb_field *);
1389 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1390 	memset(greq, 0, sizeof(*greq));
1391 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1392 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1393 	greq->mask = htobe64(1);
1394 	t3_mgmt_tx(adap, m);
1395 
1396 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1397 	t3_tp_set_offload_mode(adap, 0);
1398 	return (i);
1399 }
1400 
1401 /**
1402  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1403  *	@adap: the adapter
1404  *
1405  *	Sets up RSS to distribute packets to multiple receive queues.  We
1406  *	configure the RSS CPU lookup table to distribute to the number of HW
1407  *	receive queues, and the response queue lookup table to narrow that
1408  *	down to the response queues actually configured for each port.
1409  *	We always configure the RSS mapping for two ports since the mapping
1410  *	table has plenty of entries.
1411  */
1412 static void
1413 setup_rss(adapter_t *adap)
1414 {
1415 	int i;
1416 	u_int nq[2];
1417 	uint8_t cpus[SGE_QSETS + 1];
1418 	uint16_t rspq_map[RSS_TABLE_SIZE];
1419 
1420 	for (i = 0; i < SGE_QSETS; ++i)
1421 		cpus[i] = i;
1422 	cpus[SGE_QSETS] = 0xff;
1423 
1424 	nq[0] = nq[1] = 0;
1425 	for_each_port(adap, i) {
1426 		const struct port_info *pi = adap2pinfo(adap, i);
1427 
1428 		nq[pi->tx_chan] += pi->nqsets;
1429 	}
1430 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1431 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1432 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1433 	}
1434 
1435 	/* Calculate the reverse RSS map table */
1436 	for (i = 0; i < SGE_QSETS; ++i)
1437 		adap->rrss_map[i] = 0xff;
1438 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1439 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1440 			adap->rrss_map[rspq_map[i]] = i;
1441 
1442 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1443 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1444 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1445 	              cpus, rspq_map);
1446 
1447 }
1448 static void
1449 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1450 			      int hi, int port)
1451 {
1452 	struct mbuf *m;
1453 	struct mngt_pktsched_wr *req;
1454 
1455 	m = m_gethdr(M_NOWAIT, MT_DATA);
1456 	if (m) {
1457 		req = mtod(m, struct mngt_pktsched_wr *);
1458 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1459 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1460 		req->sched = sched;
1461 		req->idx = qidx;
1462 		req->min = lo;
1463 		req->max = hi;
1464 		req->binding = port;
1465 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1466 		t3_mgmt_tx(adap, m);
1467 	}
1468 }
1469 
1470 static void
1471 bind_qsets(adapter_t *sc)
1472 {
1473 	int i, j;
1474 
1475 	for (i = 0; i < (sc)->params.nports; ++i) {
1476 		const struct port_info *pi = adap2pinfo(sc, i);
1477 
1478 		for (j = 0; j < pi->nqsets; ++j) {
1479 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1480 					  -1, pi->tx_chan);
1481 
1482 		}
1483 	}
1484 }
1485 
1486 static void
1487 update_tpeeprom(struct adapter *adap)
1488 {
1489 	const struct firmware *tpeeprom;
1490 
1491 	uint32_t version;
1492 	unsigned int major, minor;
1493 	int ret, len;
1494 	char rev, name[32];
1495 
1496 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1497 
1498 	major = G_TP_VERSION_MAJOR(version);
1499 	minor = G_TP_VERSION_MINOR(version);
1500 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1501 		return;
1502 
1503 	rev = t3rev2char(adap);
1504 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1505 
1506 	tpeeprom = firmware_get(name);
1507 	if (tpeeprom == NULL) {
1508 		device_printf(adap->dev,
1509 			      "could not load TP EEPROM: unable to load %s\n",
1510 			      name);
1511 		return;
1512 	}
1513 
1514 	len = tpeeprom->datasize - 4;
1515 
1516 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1517 	if (ret)
1518 		goto release_tpeeprom;
1519 
1520 	if (len != TP_SRAM_LEN) {
1521 		device_printf(adap->dev,
1522 			      "%s length is wrong len=%d expected=%d\n", name,
1523 			      len, TP_SRAM_LEN);
1524 		return;
1525 	}
1526 
1527 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1528 	    TP_SRAM_OFFSET);
1529 
1530 	if (!ret) {
1531 		device_printf(adap->dev,
1532 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1533 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1534 	} else
1535 		device_printf(adap->dev,
1536 			      "Protocol SRAM image update in EEPROM failed\n");
1537 
1538 release_tpeeprom:
1539 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1540 
1541 	return;
1542 }
1543 
1544 static int
1545 update_tpsram(struct adapter *adap)
1546 {
1547 	const struct firmware *tpsram;
1548 	int ret;
1549 	char rev, name[32];
1550 
1551 	rev = t3rev2char(adap);
1552 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1553 
1554 	update_tpeeprom(adap);
1555 
1556 	tpsram = firmware_get(name);
1557 	if (tpsram == NULL){
1558 		device_printf(adap->dev, "could not load TP SRAM\n");
1559 		return (EINVAL);
1560 	} else
1561 		device_printf(adap->dev, "updating TP SRAM\n");
1562 
1563 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1564 	if (ret)
1565 		goto release_tpsram;
1566 
1567 	ret = t3_set_proto_sram(adap, tpsram->data);
1568 	if (ret)
1569 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1570 
1571 release_tpsram:
1572 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1573 
1574 	return ret;
1575 }
1576 
1577 /**
1578  *	cxgb_up - enable the adapter
1579  *	@adap: adapter being enabled
1580  *
1581  *	Called when the first port is enabled, this function performs the
1582  *	actions necessary to make an adapter operational, such as completing
1583  *	the initialization of HW modules, and enabling interrupts.
1584  */
1585 static int
1586 cxgb_up(struct adapter *sc)
1587 {
1588 	int err = 0;
1589 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1590 
1591 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1592 					   __func__, sc->open_device_map));
1593 
1594 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1595 
1596 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1597 
1598 		if ((sc->flags & FW_UPTODATE) == 0)
1599 			if ((err = upgrade_fw(sc)))
1600 				goto out;
1601 
1602 		if ((sc->flags & TPS_UPTODATE) == 0)
1603 			if ((err = update_tpsram(sc)))
1604 				goto out;
1605 
1606 		if (is_offload(sc) && nfilters != 0) {
1607 			sc->params.mc5.nservers = 0;
1608 
1609 			if (nfilters < 0)
1610 				sc->params.mc5.nfilters = mxf;
1611 			else
1612 				sc->params.mc5.nfilters = min(nfilters, mxf);
1613 		}
1614 
1615 		err = t3_init_hw(sc, 0);
1616 		if (err)
1617 			goto out;
1618 
1619 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1620 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1621 
1622 		err = setup_sge_qsets(sc);
1623 		if (err)
1624 			goto out;
1625 
1626 		alloc_filters(sc);
1627 		setup_rss(sc);
1628 
1629 		t3_add_configured_sysctls(sc);
1630 		sc->flags |= FULL_INIT_DONE;
1631 	}
1632 
1633 	t3_intr_clear(sc);
1634 	t3_sge_start(sc);
1635 	t3_intr_enable(sc);
1636 
1637 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1638 	    is_offload(sc) && init_tp_parity(sc) == 0)
1639 		sc->flags |= TP_PARITY_INIT;
1640 
1641 	if (sc->flags & TP_PARITY_INIT) {
1642 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1643 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1644 	}
1645 
1646 	if (!(sc->flags & QUEUES_BOUND)) {
1647 		bind_qsets(sc);
1648 		setup_hw_filters(sc);
1649 		sc->flags |= QUEUES_BOUND;
1650 	}
1651 
1652 	t3_sge_reset_adapter(sc);
1653 out:
1654 	return (err);
1655 }
1656 
1657 /*
1658  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1659  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1660  * during controller_detach, not here.
1661  */
1662 static void
1663 cxgb_down(struct adapter *sc)
1664 {
1665 	t3_sge_stop(sc);
1666 	t3_intr_disable(sc);
1667 }
1668 
1669 /*
1670  * if_init for cxgb ports.
1671  */
1672 static void
1673 cxgb_init(void *arg)
1674 {
1675 	struct port_info *p = arg;
1676 	struct adapter *sc = p->adapter;
1677 
1678 	ADAPTER_LOCK(sc);
1679 	cxgb_init_locked(p); /* releases adapter lock */
1680 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1681 }
1682 
1683 static int
1684 cxgb_init_locked(struct port_info *p)
1685 {
1686 	struct adapter *sc = p->adapter;
1687 	if_t ifp = p->ifp;
1688 	struct cmac *mac = &p->mac;
1689 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1690 
1691 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1692 
1693 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1694 		gave_up_lock = 1;
1695 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1696 			rc = EINTR;
1697 			goto done;
1698 		}
1699 	}
1700 	if (IS_DOOMED(p)) {
1701 		rc = ENXIO;
1702 		goto done;
1703 	}
1704 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1705 
1706 	/*
1707 	 * The code that runs during one-time adapter initialization can sleep
1708 	 * so it's important not to hold any locks across it.
1709 	 */
1710 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1711 
1712 	if (may_sleep) {
1713 		SET_BUSY(sc);
1714 		gave_up_lock = 1;
1715 		ADAPTER_UNLOCK(sc);
1716 	}
1717 
1718 	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1719 			goto done;
1720 
1721 	PORT_LOCK(p);
1722 	if (isset(&sc->open_device_map, p->port_id) &&
1723 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
1724 		PORT_UNLOCK(p);
1725 		goto done;
1726 	}
1727 	t3_port_intr_enable(sc, p->port_id);
1728 	if (!mac->multiport)
1729 		t3_mac_init(mac);
1730 	cxgb_update_mac_settings(p);
1731 	t3_link_start(&p->phy, mac, &p->link_config);
1732 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1733 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1734 	PORT_UNLOCK(p);
1735 
1736 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1737 		struct sge_qset *qs = &sc->sge.qs[i];
1738 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1739 
1740 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1741 				 txq->txq_watchdog.c_cpu);
1742 	}
1743 
1744 	/* all ok */
1745 	setbit(&sc->open_device_map, p->port_id);
1746 	callout_reset(&p->link_check_ch,
1747 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1748 	    link_check_callout, p);
1749 
1750 done:
1751 	if (may_sleep) {
1752 		ADAPTER_LOCK(sc);
1753 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1754 		CLR_BUSY(sc);
1755 	}
1756 	if (gave_up_lock)
1757 		wakeup_one(&sc->flags);
1758 	ADAPTER_UNLOCK(sc);
1759 	return (rc);
1760 }
1761 
1762 static int
1763 cxgb_uninit_locked(struct port_info *p)
1764 {
1765 	struct adapter *sc = p->adapter;
1766 	int rc;
1767 
1768 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1769 
1770 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1771 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1772 			rc = EINTR;
1773 			goto done;
1774 		}
1775 	}
1776 	if (IS_DOOMED(p)) {
1777 		rc = ENXIO;
1778 		goto done;
1779 	}
1780 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1781 	SET_BUSY(sc);
1782 	ADAPTER_UNLOCK(sc);
1783 
1784 	rc = cxgb_uninit_synchronized(p);
1785 
1786 	ADAPTER_LOCK(sc);
1787 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1788 	CLR_BUSY(sc);
1789 	wakeup_one(&sc->flags);
1790 done:
1791 	ADAPTER_UNLOCK(sc);
1792 	return (rc);
1793 }
1794 
1795 /*
1796  * Called on "ifconfig down", and from port_detach
1797  */
1798 static int
1799 cxgb_uninit_synchronized(struct port_info *pi)
1800 {
1801 	struct adapter *sc = pi->adapter;
1802 	if_t ifp = pi->ifp;
1803 
1804 	/*
1805 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1806 	 */
1807 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1808 
1809 	/*
1810 	 * Clear this port's bit from the open device map, and then drain all
1811 	 * the tasks that can access/manipulate this port's port_info or ifp.
1812 	 * We disable this port's interrupts here and so the slow/ext
1813 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1814 	 * be enqueued every second but the runs after this drain will not see
1815 	 * this port in the open device map.
1816 	 *
1817 	 * A well behaved task must take open_device_map into account and ignore
1818 	 * ports that are not open.
1819 	 */
1820 	clrbit(&sc->open_device_map, pi->port_id);
1821 	t3_port_intr_disable(sc, pi->port_id);
1822 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1823 	taskqueue_drain(sc->tq, &sc->tick_task);
1824 
1825 	callout_drain(&pi->link_check_ch);
1826 	taskqueue_drain(sc->tq, &pi->link_check_task);
1827 
1828 	PORT_LOCK(pi);
1829 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1830 
1831 	/* disable pause frames */
1832 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1833 
1834 	/* Reset RX FIFO HWM */
1835 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1836 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1837 
1838 	DELAY(100 * 1000);
1839 
1840 	/* Wait for TXFIFO empty */
1841 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1842 			F_TXFIFO_EMPTY, 1, 20, 5);
1843 
1844 	DELAY(100 * 1000);
1845 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1846 
1847 	pi->phy.ops->power_down(&pi->phy, 1);
1848 
1849 	PORT_UNLOCK(pi);
1850 
1851 	pi->link_config.link_ok = 0;
1852 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1853 
1854 	if (sc->open_device_map == 0)
1855 		cxgb_down(pi->adapter);
1856 
1857 	return (0);
1858 }
1859 
1860 /*
1861  * Mark lro enabled or disabled in all qsets for this port
1862  */
1863 static int
1864 cxgb_set_lro(struct port_info *p, int enabled)
1865 {
1866 	int i;
1867 	struct adapter *adp = p->adapter;
1868 	struct sge_qset *q;
1869 
1870 	for (i = 0; i < p->nqsets; i++) {
1871 		q = &adp->sge.qs[p->first_qset + i];
1872 		q->lro.enabled = (enabled != 0);
1873 	}
1874 	return (0);
1875 }
1876 
1877 static int
1878 cxgb_ioctl(if_t ifp, unsigned long command, caddr_t data)
1879 {
1880 	struct port_info *p = if_getsoftc(ifp);
1881 	struct adapter *sc = p->adapter;
1882 	struct ifreq *ifr = (struct ifreq *)data;
1883 	int flags, error = 0, mtu;
1884 	uint32_t mask;
1885 
1886 	switch (command) {
1887 	case SIOCSIFMTU:
1888 		ADAPTER_LOCK(sc);
1889 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1890 		if (error) {
1891 fail:
1892 			ADAPTER_UNLOCK(sc);
1893 			return (error);
1894 		}
1895 
1896 		mtu = ifr->ifr_mtu;
1897 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1898 			error = EINVAL;
1899 		} else {
1900 			if_setmtu(ifp, mtu);
1901 			PORT_LOCK(p);
1902 			cxgb_update_mac_settings(p);
1903 			PORT_UNLOCK(p);
1904 		}
1905 		ADAPTER_UNLOCK(sc);
1906 		break;
1907 	case SIOCSIFFLAGS:
1908 		ADAPTER_LOCK(sc);
1909 		if (IS_DOOMED(p)) {
1910 			error = ENXIO;
1911 			goto fail;
1912 		}
1913 		if (if_getflags(ifp) & IFF_UP) {
1914 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1915 				flags = p->if_flags;
1916 				if (((if_getflags(ifp) ^ flags) & IFF_PROMISC) ||
1917 				    ((if_getflags(ifp) ^ flags) & IFF_ALLMULTI)) {
1918 					if (IS_BUSY(sc)) {
1919 						error = EBUSY;
1920 						goto fail;
1921 					}
1922 					PORT_LOCK(p);
1923 					cxgb_update_mac_settings(p);
1924 					PORT_UNLOCK(p);
1925 				}
1926 				ADAPTER_UNLOCK(sc);
1927 			} else
1928 				error = cxgb_init_locked(p);
1929 			p->if_flags = if_getflags(ifp);
1930 		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1931 			error = cxgb_uninit_locked(p);
1932 		else
1933 			ADAPTER_UNLOCK(sc);
1934 
1935 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1936 		break;
1937 	case SIOCADDMULTI:
1938 	case SIOCDELMULTI:
1939 		ADAPTER_LOCK(sc);
1940 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1941 		if (error)
1942 			goto fail;
1943 
1944 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1945 			PORT_LOCK(p);
1946 			cxgb_update_mac_settings(p);
1947 			PORT_UNLOCK(p);
1948 		}
1949 		ADAPTER_UNLOCK(sc);
1950 
1951 		break;
1952 	case SIOCSIFCAP:
1953 		ADAPTER_LOCK(sc);
1954 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1955 		if (error)
1956 			goto fail;
1957 
1958 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1959 		if (mask & IFCAP_TXCSUM) {
1960 			if_togglecapenable(ifp, IFCAP_TXCSUM);
1961 			if_togglehwassist(ifp, CSUM_TCP | CSUM_UDP | CSUM_IP);
1962 
1963 			if (IFCAP_TSO4 & if_getcapenable(ifp) &&
1964 			    !(IFCAP_TXCSUM & if_getcapenable(ifp))) {
1965 				mask &= ~IFCAP_TSO4;
1966 				if_setcapenablebit(ifp, 0, IFCAP_TSO4);
1967 				if_printf(ifp,
1968 				    "tso4 disabled due to -txcsum.\n");
1969 			}
1970 		}
1971 		if (mask & IFCAP_TXCSUM_IPV6) {
1972 			if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
1973 			if_togglehwassist(ifp, CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1974 
1975 			if (IFCAP_TSO6 & if_getcapenable(ifp) &&
1976 			    !(IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp))) {
1977 				mask &= ~IFCAP_TSO6;
1978 				if_setcapenablebit(ifp, 0, IFCAP_TSO6);
1979 				if_printf(ifp,
1980 				    "tso6 disabled due to -txcsum6.\n");
1981 			}
1982 		}
1983 		if (mask & IFCAP_RXCSUM)
1984 			if_togglecapenable(ifp, IFCAP_RXCSUM);
1985 		if (mask & IFCAP_RXCSUM_IPV6)
1986 			if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
1987 
1988 		/*
1989 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1990 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1991 		 * sending a TSO request our way, so it's sufficient to toggle
1992 		 * IFCAP_TSOx only.
1993 		 */
1994 		if (mask & IFCAP_TSO4) {
1995 			if (!(IFCAP_TSO4 & if_getcapenable(ifp)) &&
1996 			    !(IFCAP_TXCSUM & if_getcapenable(ifp))) {
1997 				if_printf(ifp, "enable txcsum first.\n");
1998 				error = EAGAIN;
1999 				goto fail;
2000 			}
2001 			if_togglecapenable(ifp, IFCAP_TSO4);
2002 		}
2003 		if (mask & IFCAP_TSO6) {
2004 			if (!(IFCAP_TSO6 & if_getcapenable(ifp)) &&
2005 			    !(IFCAP_TXCSUM_IPV6 & if_getcapenable(ifp))) {
2006 				if_printf(ifp, "enable txcsum6 first.\n");
2007 				error = EAGAIN;
2008 				goto fail;
2009 			}
2010 			if_togglecapenable(ifp, IFCAP_TSO6);
2011 		}
2012 		if (mask & IFCAP_LRO) {
2013 			if_togglecapenable(ifp, IFCAP_LRO);
2014 
2015 			/* Safe to do this even if cxgb_up not called yet */
2016 			cxgb_set_lro(p, if_getcapenable(ifp) & IFCAP_LRO);
2017 		}
2018 #ifdef TCP_OFFLOAD
2019 		if (mask & IFCAP_TOE4) {
2020 			int enable = (if_getcapenable(ifp) ^ mask) & IFCAP_TOE4;
2021 
2022 			error = toe_capability(p, enable);
2023 			if (error == 0)
2024 				if_togglecapenable(ifp, mask);
2025 		}
2026 #endif
2027 		if (mask & IFCAP_VLAN_HWTAGGING) {
2028 			if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
2029 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
2030 				PORT_LOCK(p);
2031 				cxgb_update_mac_settings(p);
2032 				PORT_UNLOCK(p);
2033 			}
2034 		}
2035 		if (mask & IFCAP_VLAN_MTU) {
2036 			if_togglecapenable(ifp, IFCAP_VLAN_MTU);
2037 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
2038 				PORT_LOCK(p);
2039 				cxgb_update_mac_settings(p);
2040 				PORT_UNLOCK(p);
2041 			}
2042 		}
2043 		if (mask & IFCAP_VLAN_HWTSO)
2044 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
2045 		if (mask & IFCAP_VLAN_HWCSUM)
2046 			if_togglecapenable(ifp, IFCAP_VLAN_HWCSUM);
2047 
2048 #ifdef VLAN_CAPABILITIES
2049 		VLAN_CAPABILITIES(ifp);
2050 #endif
2051 		ADAPTER_UNLOCK(sc);
2052 		break;
2053 	case SIOCSIFMEDIA:
2054 	case SIOCGIFMEDIA:
2055 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2056 		break;
2057 	default:
2058 		error = ether_ioctl(ifp, command, data);
2059 	}
2060 
2061 	return (error);
2062 }
2063 
2064 static int
2065 cxgb_media_change(if_t ifp)
2066 {
2067 	return (EOPNOTSUPP);
2068 }
2069 
2070 /*
2071  * Translates phy->modtype to the correct Ethernet media subtype.
2072  */
2073 static int
2074 cxgb_ifm_type(int mod)
2075 {
2076 	switch (mod) {
2077 	case phy_modtype_sr:
2078 		return (IFM_10G_SR);
2079 	case phy_modtype_lr:
2080 		return (IFM_10G_LR);
2081 	case phy_modtype_lrm:
2082 		return (IFM_10G_LRM);
2083 	case phy_modtype_twinax:
2084 		return (IFM_10G_TWINAX);
2085 	case phy_modtype_twinax_long:
2086 		return (IFM_10G_TWINAX_LONG);
2087 	case phy_modtype_none:
2088 		return (IFM_NONE);
2089 	case phy_modtype_unknown:
2090 		return (IFM_UNKNOWN);
2091 	}
2092 
2093 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2094 	return (IFM_UNKNOWN);
2095 }
2096 
2097 /*
2098  * Rebuilds the ifmedia list for this port, and sets the current media.
2099  */
2100 static void
2101 cxgb_build_medialist(struct port_info *p)
2102 {
2103 	struct cphy *phy = &p->phy;
2104 	struct ifmedia *media = &p->media;
2105 	int mod = phy->modtype;
2106 	int m = IFM_ETHER | IFM_FDX;
2107 
2108 	PORT_LOCK(p);
2109 
2110 	ifmedia_removeall(media);
2111 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2112 		/* Copper (RJ45) */
2113 
2114 		if (phy->caps & SUPPORTED_10000baseT_Full)
2115 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2116 
2117 		if (phy->caps & SUPPORTED_1000baseT_Full)
2118 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2119 
2120 		if (phy->caps & SUPPORTED_100baseT_Full)
2121 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2122 
2123 		if (phy->caps & SUPPORTED_10baseT_Full)
2124 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2125 
2126 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2127 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2128 
2129 	} else if (phy->caps & SUPPORTED_TP) {
2130 		/* Copper (CX4) */
2131 
2132 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2133 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2134 
2135 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2136 		ifmedia_set(media, m | IFM_10G_CX4);
2137 
2138 	} else if (phy->caps & SUPPORTED_FIBRE &&
2139 		   phy->caps & SUPPORTED_10000baseT_Full) {
2140 		/* 10G optical (but includes SFP+ twinax) */
2141 
2142 		m |= cxgb_ifm_type(mod);
2143 		if (IFM_SUBTYPE(m) == IFM_NONE)
2144 			m &= ~IFM_FDX;
2145 
2146 		ifmedia_add(media, m, mod, NULL);
2147 		ifmedia_set(media, m);
2148 
2149 	} else if (phy->caps & SUPPORTED_FIBRE &&
2150 		   phy->caps & SUPPORTED_1000baseT_Full) {
2151 		/* 1G optical */
2152 
2153 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2154 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2155 		ifmedia_set(media, m | IFM_1000_SX);
2156 
2157 	} else {
2158 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2159 			    phy->caps));
2160 	}
2161 
2162 	PORT_UNLOCK(p);
2163 }
2164 
2165 static void
2166 cxgb_media_status(if_t ifp, struct ifmediareq *ifmr)
2167 {
2168 	struct port_info *p = if_getsoftc(ifp);
2169 	struct ifmedia_entry *cur = p->media.ifm_cur;
2170 	int speed = p->link_config.speed;
2171 
2172 	if (cur->ifm_data != p->phy.modtype) {
2173 		cxgb_build_medialist(p);
2174 		cur = p->media.ifm_cur;
2175 	}
2176 
2177 	ifmr->ifm_status = IFM_AVALID;
2178 	if (!p->link_config.link_ok)
2179 		return;
2180 
2181 	ifmr->ifm_status |= IFM_ACTIVE;
2182 
2183 	/*
2184 	 * active and current will differ iff current media is autoselect.  That
2185 	 * can happen only for copper RJ45.
2186 	 */
2187 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2188 		return;
2189 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2190 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2191 
2192 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2193 	if (speed == SPEED_10000)
2194 		ifmr->ifm_active |= IFM_10G_T;
2195 	else if (speed == SPEED_1000)
2196 		ifmr->ifm_active |= IFM_1000_T;
2197 	else if (speed == SPEED_100)
2198 		ifmr->ifm_active |= IFM_100_TX;
2199 	else if (speed == SPEED_10)
2200 		ifmr->ifm_active |= IFM_10_T;
2201 	else
2202 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2203 			    speed));
2204 }
2205 
2206 static uint64_t
2207 cxgb_get_counter(if_t ifp, ift_counter c)
2208 {
2209 	struct port_info *pi = if_getsoftc(ifp);
2210 	struct adapter *sc = pi->adapter;
2211 	struct cmac *mac = &pi->mac;
2212 	struct mac_stats *mstats = &mac->stats;
2213 
2214 	cxgb_refresh_stats(pi);
2215 
2216 	switch (c) {
2217 	case IFCOUNTER_IPACKETS:
2218 		return (mstats->rx_frames);
2219 
2220 	case IFCOUNTER_IERRORS:
2221 		return (mstats->rx_jabber + mstats->rx_data_errs +
2222 		    mstats->rx_sequence_errs + mstats->rx_runt +
2223 		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2224 		    mstats->rx_short + mstats->rx_fcs_errs);
2225 
2226 	case IFCOUNTER_OPACKETS:
2227 		return (mstats->tx_frames);
2228 
2229 	case IFCOUNTER_OERRORS:
2230 		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2231 		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2232 		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2233 
2234 	case IFCOUNTER_COLLISIONS:
2235 		return (mstats->tx_total_collisions);
2236 
2237 	case IFCOUNTER_IBYTES:
2238 		return (mstats->rx_octets);
2239 
2240 	case IFCOUNTER_OBYTES:
2241 		return (mstats->tx_octets);
2242 
2243 	case IFCOUNTER_IMCASTS:
2244 		return (mstats->rx_mcast_frames);
2245 
2246 	case IFCOUNTER_OMCASTS:
2247 		return (mstats->tx_mcast_frames);
2248 
2249 	case IFCOUNTER_IQDROPS:
2250 		return (mstats->rx_cong_drops);
2251 
2252 	case IFCOUNTER_OQDROPS: {
2253 		int i;
2254 		uint64_t drops;
2255 
2256 		drops = 0;
2257 		if (sc->flags & FULL_INIT_DONE) {
2258 			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2259 				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2260 		}
2261 
2262 		return (drops);
2263 
2264 	}
2265 
2266 	default:
2267 		return (if_get_counter_default(ifp, c));
2268 	}
2269 }
2270 
2271 static void
2272 cxgb_async_intr(void *data)
2273 {
2274 	adapter_t *sc = data;
2275 
2276 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2277 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2278 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2279 }
2280 
2281 static void
2282 link_check_callout(void *arg)
2283 {
2284 	struct port_info *pi = arg;
2285 	struct adapter *sc = pi->adapter;
2286 
2287 	if (!isset(&sc->open_device_map, pi->port_id))
2288 		return;
2289 
2290 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2291 }
2292 
2293 static void
2294 check_link_status(void *arg, int pending)
2295 {
2296 	struct port_info *pi = arg;
2297 	struct adapter *sc = pi->adapter;
2298 
2299 	if (!isset(&sc->open_device_map, pi->port_id))
2300 		return;
2301 
2302 	t3_link_changed(sc, pi->port_id);
2303 
2304 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2305 	    pi->link_config.link_ok == 0)
2306 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2307 }
2308 
2309 void
2310 t3_os_link_intr(struct port_info *pi)
2311 {
2312 	/*
2313 	 * Schedule a link check in the near future.  If the link is flapping
2314 	 * rapidly we'll keep resetting the callout and delaying the check until
2315 	 * things stabilize a bit.
2316 	 */
2317 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2318 }
2319 
2320 static void
2321 check_t3b2_mac(struct adapter *sc)
2322 {
2323 	int i;
2324 
2325 	if (sc->flags & CXGB_SHUTDOWN)
2326 		return;
2327 
2328 	for_each_port(sc, i) {
2329 		struct port_info *p = &sc->port[i];
2330 		int status;
2331 #ifdef INVARIANTS
2332 		if_t ifp = p->ifp;
2333 #endif
2334 
2335 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2336 		    !p->link_config.link_ok)
2337 			continue;
2338 
2339 		KASSERT(if_getdrvflags(ifp) & IFF_DRV_RUNNING,
2340 			("%s: state mismatch (drv_flags %x, device_map %x)",
2341 			 __func__, if_getdrvflags(ifp), sc->open_device_map));
2342 
2343 		PORT_LOCK(p);
2344 		status = t3b2_mac_watchdog_task(&p->mac);
2345 		if (status == 1)
2346 			p->mac.stats.num_toggled++;
2347 		else if (status == 2) {
2348 			struct cmac *mac = &p->mac;
2349 
2350 			cxgb_update_mac_settings(p);
2351 			t3_link_start(&p->phy, mac, &p->link_config);
2352 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2353 			t3_port_intr_enable(sc, p->port_id);
2354 			p->mac.stats.num_resets++;
2355 		}
2356 		PORT_UNLOCK(p);
2357 	}
2358 }
2359 
2360 static void
2361 cxgb_tick(void *arg)
2362 {
2363 	adapter_t *sc = (adapter_t *)arg;
2364 
2365 	if (sc->flags & CXGB_SHUTDOWN)
2366 		return;
2367 
2368 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2369 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2370 }
2371 
2372 void
2373 cxgb_refresh_stats(struct port_info *pi)
2374 {
2375 	struct timeval tv;
2376 	const struct timeval interval = {0, 250000};    /* 250ms */
2377 
2378 	getmicrotime(&tv);
2379 	timevalsub(&tv, &interval);
2380 	if (timevalcmp(&tv, &pi->last_refreshed, <))
2381 		return;
2382 
2383 	PORT_LOCK(pi);
2384 	t3_mac_update_stats(&pi->mac);
2385 	PORT_UNLOCK(pi);
2386 	getmicrotime(&pi->last_refreshed);
2387 }
2388 
2389 static void
2390 cxgb_tick_handler(void *arg, int count)
2391 {
2392 	adapter_t *sc = (adapter_t *)arg;
2393 	const struct adapter_params *p = &sc->params;
2394 	int i;
2395 	uint32_t cause, reset;
2396 
2397 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2398 		return;
2399 
2400 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2401 		check_t3b2_mac(sc);
2402 
2403 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2404 	if (cause) {
2405 		struct sge_qset *qs = &sc->sge.qs[0];
2406 		uint32_t mask, v;
2407 
2408 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2409 
2410 		mask = 1;
2411 		for (i = 0; i < SGE_QSETS; i++) {
2412 			if (v & mask)
2413 				qs[i].rspq.starved++;
2414 			mask <<= 1;
2415 		}
2416 
2417 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2418 
2419 		for (i = 0; i < SGE_QSETS * 2; i++) {
2420 			if (v & mask) {
2421 				qs[i / 2].fl[i % 2].empty++;
2422 			}
2423 			mask <<= 1;
2424 		}
2425 
2426 		/* clear */
2427 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2428 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2429 	}
2430 
2431 	for (i = 0; i < sc->params.nports; i++) {
2432 		struct port_info *pi = &sc->port[i];
2433 		struct cmac *mac = &pi->mac;
2434 
2435 		if (!isset(&sc->open_device_map, pi->port_id))
2436 			continue;
2437 
2438 		cxgb_refresh_stats(pi);
2439 
2440 		if (mac->multiport)
2441 			continue;
2442 
2443 		/* Count rx fifo overflows, once per second */
2444 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2445 		reset = 0;
2446 		if (cause & F_RXFIFO_OVERFLOW) {
2447 			mac->stats.rx_fifo_ovfl++;
2448 			reset |= F_RXFIFO_OVERFLOW;
2449 		}
2450 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2451 	}
2452 }
2453 
2454 static void
2455 touch_bars(device_t dev)
2456 {
2457 	/*
2458 	 * Don't enable yet
2459 	 */
2460 #if !defined(__LP64__) && 0
2461 	u32 v;
2462 
2463 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2464 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2465 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2466 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2467 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2468 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2469 #endif
2470 }
2471 
2472 static int
2473 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2474 {
2475 	uint8_t *buf;
2476 	int err = 0;
2477 	u32 aligned_offset, aligned_len, *p;
2478 	struct adapter *adapter = pi->adapter;
2479 
2480 
2481 	aligned_offset = offset & ~3;
2482 	aligned_len = (len + (offset & 3) + 3) & ~3;
2483 
2484 	if (aligned_offset != offset || aligned_len != len) {
2485 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2486 		if (!buf)
2487 			return (ENOMEM);
2488 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2489 		if (!err && aligned_len > 4)
2490 			err = t3_seeprom_read(adapter,
2491 					      aligned_offset + aligned_len - 4,
2492 					      (u32 *)&buf[aligned_len - 4]);
2493 		if (err)
2494 			goto out;
2495 		memcpy(buf + (offset & 3), data, len);
2496 	} else
2497 		buf = (uint8_t *)(uintptr_t)data;
2498 
2499 	err = t3_seeprom_wp(adapter, 0);
2500 	if (err)
2501 		goto out;
2502 
2503 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2504 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2505 		aligned_offset += 4;
2506 	}
2507 
2508 	if (!err)
2509 		err = t3_seeprom_wp(adapter, 1);
2510 out:
2511 	if (buf != data)
2512 		free(buf, M_DEVBUF);
2513 	return err;
2514 }
2515 
2516 
2517 static int
2518 in_range(int val, int lo, int hi)
2519 {
2520 	return val < 0 || (val <= hi && val >= lo);
2521 }
2522 
2523 static int
2524 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2525 {
2526        return (0);
2527 }
2528 
2529 static int
2530 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2531 {
2532        return (0);
2533 }
2534 
2535 static int
2536 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2537     int fflag, struct thread *td)
2538 {
2539 	int mmd, error = 0;
2540 	struct port_info *pi = dev->si_drv1;
2541 	adapter_t *sc = pi->adapter;
2542 
2543 #ifdef PRIV_SUPPORTED
2544 	if (priv_check(td, PRIV_DRIVER)) {
2545 		if (cxgb_debug)
2546 			printf("user does not have access to privileged ioctls\n");
2547 		return (EPERM);
2548 	}
2549 #else
2550 	if (suser(td)) {
2551 		if (cxgb_debug)
2552 			printf("user does not have access to privileged ioctls\n");
2553 		return (EPERM);
2554 	}
2555 #endif
2556 
2557 	switch (cmd) {
2558 	case CHELSIO_GET_MIIREG: {
2559 		uint32_t val;
2560 		struct cphy *phy = &pi->phy;
2561 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2562 
2563 		if (!phy->mdio_read)
2564 			return (EOPNOTSUPP);
2565 		if (is_10G(sc)) {
2566 			mmd = mid->phy_id >> 8;
2567 			if (!mmd)
2568 				mmd = MDIO_DEV_PCS;
2569 			else if (mmd > MDIO_DEV_VEND2)
2570 				return (EINVAL);
2571 
2572 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2573 					     mid->reg_num, &val);
2574 		} else
2575 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2576 					     mid->reg_num & 0x1f, &val);
2577 		if (error == 0)
2578 			mid->val_out = val;
2579 		break;
2580 	}
2581 	case CHELSIO_SET_MIIREG: {
2582 		struct cphy *phy = &pi->phy;
2583 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2584 
2585 		if (!phy->mdio_write)
2586 			return (EOPNOTSUPP);
2587 		if (is_10G(sc)) {
2588 			mmd = mid->phy_id >> 8;
2589 			if (!mmd)
2590 				mmd = MDIO_DEV_PCS;
2591 			else if (mmd > MDIO_DEV_VEND2)
2592 				return (EINVAL);
2593 
2594 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2595 					      mmd, mid->reg_num, mid->val_in);
2596 		} else
2597 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2598 					      mid->reg_num & 0x1f,
2599 					      mid->val_in);
2600 		break;
2601 	}
2602 	case CHELSIO_SETREG: {
2603 		struct ch_reg *edata = (struct ch_reg *)data;
2604 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2605 			return (EFAULT);
2606 		t3_write_reg(sc, edata->addr, edata->val);
2607 		break;
2608 	}
2609 	case CHELSIO_GETREG: {
2610 		struct ch_reg *edata = (struct ch_reg *)data;
2611 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2612 			return (EFAULT);
2613 		edata->val = t3_read_reg(sc, edata->addr);
2614 		break;
2615 	}
2616 	case CHELSIO_GET_SGE_CONTEXT: {
2617 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2618 		mtx_lock_spin(&sc->sge.reg_lock);
2619 		switch (ecntxt->cntxt_type) {
2620 		case CNTXT_TYPE_EGRESS:
2621 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2622 			    ecntxt->data);
2623 			break;
2624 		case CNTXT_TYPE_FL:
2625 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2626 			    ecntxt->data);
2627 			break;
2628 		case CNTXT_TYPE_RSP:
2629 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2630 			    ecntxt->data);
2631 			break;
2632 		case CNTXT_TYPE_CQ:
2633 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2634 			    ecntxt->data);
2635 			break;
2636 		default:
2637 			error = EINVAL;
2638 			break;
2639 		}
2640 		mtx_unlock_spin(&sc->sge.reg_lock);
2641 		break;
2642 	}
2643 	case CHELSIO_GET_SGE_DESC: {
2644 		struct ch_desc *edesc = (struct ch_desc *)data;
2645 		int ret;
2646 		if (edesc->queue_num >= SGE_QSETS * 6)
2647 			return (EINVAL);
2648 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2649 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2650 		if (ret < 0)
2651 			return (EINVAL);
2652 		edesc->size = ret;
2653 		break;
2654 	}
2655 	case CHELSIO_GET_QSET_PARAMS: {
2656 		struct qset_params *q;
2657 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2658 		int q1 = pi->first_qset;
2659 		int nqsets = pi->nqsets;
2660 		int i;
2661 
2662 		if (t->qset_idx >= nqsets)
2663 			return EINVAL;
2664 
2665 		i = q1 + t->qset_idx;
2666 		q = &sc->params.sge.qset[i];
2667 		t->rspq_size   = q->rspq_size;
2668 		t->txq_size[0] = q->txq_size[0];
2669 		t->txq_size[1] = q->txq_size[1];
2670 		t->txq_size[2] = q->txq_size[2];
2671 		t->fl_size[0]  = q->fl_size;
2672 		t->fl_size[1]  = q->jumbo_size;
2673 		t->polling     = q->polling;
2674 		t->lro         = q->lro;
2675 		t->intr_lat    = q->coalesce_usecs;
2676 		t->cong_thres  = q->cong_thres;
2677 		t->qnum        = i;
2678 
2679 		if ((sc->flags & FULL_INIT_DONE) == 0)
2680 			t->vector = 0;
2681 		else if (sc->flags & USING_MSIX)
2682 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2683 		else
2684 			t->vector = rman_get_start(sc->irq_res);
2685 
2686 		break;
2687 	}
2688 	case CHELSIO_GET_QSET_NUM: {
2689 		struct ch_reg *edata = (struct ch_reg *)data;
2690 		edata->val = pi->nqsets;
2691 		break;
2692 	}
2693 	case CHELSIO_LOAD_FW: {
2694 		uint8_t *fw_data;
2695 		uint32_t vers;
2696 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2697 
2698 		/*
2699 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2700 		 *
2701 		 * FW_UPTODATE is also set so the rest of the initialization
2702 		 * will not overwrite what was loaded here.  This gives you the
2703 		 * flexibility to load any firmware (and maybe shoot yourself in
2704 		 * the foot).
2705 		 */
2706 
2707 		ADAPTER_LOCK(sc);
2708 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2709 			ADAPTER_UNLOCK(sc);
2710 			return (EBUSY);
2711 		}
2712 
2713 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2714 		if (!fw_data)
2715 			error = ENOMEM;
2716 		else
2717 			error = copyin(t->buf, fw_data, t->len);
2718 
2719 		if (!error)
2720 			error = -t3_load_fw(sc, fw_data, t->len);
2721 
2722 		if (t3_get_fw_version(sc, &vers) == 0) {
2723 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2724 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2725 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2726 		}
2727 
2728 		if (!error)
2729 			sc->flags |= FW_UPTODATE;
2730 
2731 		free(fw_data, M_DEVBUF);
2732 		ADAPTER_UNLOCK(sc);
2733 		break;
2734 	}
2735 	case CHELSIO_LOAD_BOOT: {
2736 		uint8_t *boot_data;
2737 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2738 
2739 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2740 		if (!boot_data)
2741 			return ENOMEM;
2742 
2743 		error = copyin(t->buf, boot_data, t->len);
2744 		if (!error)
2745 			error = -t3_load_boot(sc, boot_data, t->len);
2746 
2747 		free(boot_data, M_DEVBUF);
2748 		break;
2749 	}
2750 	case CHELSIO_GET_PM: {
2751 		struct ch_pm *m = (struct ch_pm *)data;
2752 		struct tp_params *p = &sc->params.tp;
2753 
2754 		if (!is_offload(sc))
2755 			return (EOPNOTSUPP);
2756 
2757 		m->tx_pg_sz = p->tx_pg_size;
2758 		m->tx_num_pg = p->tx_num_pgs;
2759 		m->rx_pg_sz  = p->rx_pg_size;
2760 		m->rx_num_pg = p->rx_num_pgs;
2761 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2762 
2763 		break;
2764 	}
2765 	case CHELSIO_SET_PM: {
2766 		struct ch_pm *m = (struct ch_pm *)data;
2767 		struct tp_params *p = &sc->params.tp;
2768 
2769 		if (!is_offload(sc))
2770 			return (EOPNOTSUPP);
2771 		if (sc->flags & FULL_INIT_DONE)
2772 			return (EBUSY);
2773 
2774 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2775 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2776 			return (EINVAL);	/* not power of 2 */
2777 		if (!(m->rx_pg_sz & 0x14000))
2778 			return (EINVAL);	/* not 16KB or 64KB */
2779 		if (!(m->tx_pg_sz & 0x1554000))
2780 			return (EINVAL);
2781 		if (m->tx_num_pg == -1)
2782 			m->tx_num_pg = p->tx_num_pgs;
2783 		if (m->rx_num_pg == -1)
2784 			m->rx_num_pg = p->rx_num_pgs;
2785 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2786 			return (EINVAL);
2787 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2788 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2789 			return (EINVAL);
2790 
2791 		p->rx_pg_size = m->rx_pg_sz;
2792 		p->tx_pg_size = m->tx_pg_sz;
2793 		p->rx_num_pgs = m->rx_num_pg;
2794 		p->tx_num_pgs = m->tx_num_pg;
2795 		break;
2796 	}
2797 	case CHELSIO_SETMTUTAB: {
2798 		struct ch_mtus *m = (struct ch_mtus *)data;
2799 		int i;
2800 
2801 		if (!is_offload(sc))
2802 			return (EOPNOTSUPP);
2803 		if (offload_running(sc))
2804 			return (EBUSY);
2805 		if (m->nmtus != NMTUS)
2806 			return (EINVAL);
2807 		if (m->mtus[0] < 81)         /* accommodate SACK */
2808 			return (EINVAL);
2809 
2810 		/*
2811 		 * MTUs must be in ascending order
2812 		 */
2813 		for (i = 1; i < NMTUS; ++i)
2814 			if (m->mtus[i] < m->mtus[i - 1])
2815 				return (EINVAL);
2816 
2817 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2818 		break;
2819 	}
2820 	case CHELSIO_GETMTUTAB: {
2821 		struct ch_mtus *m = (struct ch_mtus *)data;
2822 
2823 		if (!is_offload(sc))
2824 			return (EOPNOTSUPP);
2825 
2826 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2827 		m->nmtus = NMTUS;
2828 		break;
2829 	}
2830 	case CHELSIO_GET_MEM: {
2831 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2832 		struct mc7 *mem;
2833 		uint8_t *useraddr;
2834 		u64 buf[32];
2835 
2836 		/*
2837 		 * Use these to avoid modifying len/addr in the return
2838 		 * struct
2839 		 */
2840 		uint32_t len = t->len, addr = t->addr;
2841 
2842 		if (!is_offload(sc))
2843 			return (EOPNOTSUPP);
2844 		if (!(sc->flags & FULL_INIT_DONE))
2845 			return (EIO);         /* need the memory controllers */
2846 		if ((addr & 0x7) || (len & 0x7))
2847 			return (EINVAL);
2848 		if (t->mem_id == MEM_CM)
2849 			mem = &sc->cm;
2850 		else if (t->mem_id == MEM_PMRX)
2851 			mem = &sc->pmrx;
2852 		else if (t->mem_id == MEM_PMTX)
2853 			mem = &sc->pmtx;
2854 		else
2855 			return (EINVAL);
2856 
2857 		/*
2858 		 * Version scheme:
2859 		 * bits 0..9: chip version
2860 		 * bits 10..15: chip revision
2861 		 */
2862 		t->version = 3 | (sc->params.rev << 10);
2863 
2864 		/*
2865 		 * Read 256 bytes at a time as len can be large and we don't
2866 		 * want to use huge intermediate buffers.
2867 		 */
2868 		useraddr = (uint8_t *)t->buf;
2869 		while (len) {
2870 			unsigned int chunk = min(len, sizeof(buf));
2871 
2872 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2873 			if (error)
2874 				return (-error);
2875 			if (copyout(buf, useraddr, chunk))
2876 				return (EFAULT);
2877 			useraddr += chunk;
2878 			addr += chunk;
2879 			len -= chunk;
2880 		}
2881 		break;
2882 	}
2883 	case CHELSIO_READ_TCAM_WORD: {
2884 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2885 
2886 		if (!is_offload(sc))
2887 			return (EOPNOTSUPP);
2888 		if (!(sc->flags & FULL_INIT_DONE))
2889 			return (EIO);         /* need MC5 */
2890 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2891 		break;
2892 	}
2893 	case CHELSIO_SET_TRACE_FILTER: {
2894 		struct ch_trace *t = (struct ch_trace *)data;
2895 		const struct trace_params *tp;
2896 
2897 		tp = (const struct trace_params *)&t->sip;
2898 		if (t->config_tx)
2899 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2900 					       t->trace_tx);
2901 		if (t->config_rx)
2902 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2903 					       t->trace_rx);
2904 		break;
2905 	}
2906 	case CHELSIO_SET_PKTSCHED: {
2907 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2908 		if (sc->open_device_map == 0)
2909 			return (EAGAIN);
2910 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2911 		    p->binding);
2912 		break;
2913 	}
2914 	case CHELSIO_IFCONF_GETREGS: {
2915 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2916 		int reglen = cxgb_get_regs_len();
2917 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2918 		if (buf == NULL) {
2919 			return (ENOMEM);
2920 		}
2921 		if (regs->len > reglen)
2922 			regs->len = reglen;
2923 		else if (regs->len < reglen)
2924 			error = ENOBUFS;
2925 
2926 		if (!error) {
2927 			cxgb_get_regs(sc, regs, buf);
2928 			error = copyout(buf, regs->data, reglen);
2929 		}
2930 		free(buf, M_DEVBUF);
2931 
2932 		break;
2933 	}
2934 	case CHELSIO_SET_HW_SCHED: {
2935 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2936 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2937 
2938 		if ((sc->flags & FULL_INIT_DONE) == 0)
2939 			return (EAGAIN);       /* need TP to be initialized */
2940 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2941 		    !in_range(t->channel, 0, 1) ||
2942 		    !in_range(t->kbps, 0, 10000000) ||
2943 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2944 		    !in_range(t->flow_ipg, 0,
2945 			      dack_ticks_to_usec(sc, 0x7ff)))
2946 			return (EINVAL);
2947 
2948 		if (t->kbps >= 0) {
2949 			error = t3_config_sched(sc, t->kbps, t->sched);
2950 			if (error < 0)
2951 				return (-error);
2952 		}
2953 		if (t->class_ipg >= 0)
2954 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2955 		if (t->flow_ipg >= 0) {
2956 			t->flow_ipg *= 1000;     /* us -> ns */
2957 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2958 		}
2959 		if (t->mode >= 0) {
2960 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2961 
2962 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2963 					 bit, t->mode ? bit : 0);
2964 		}
2965 		if (t->channel >= 0)
2966 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2967 					 1 << t->sched, t->channel << t->sched);
2968 		break;
2969 	}
2970 	case CHELSIO_GET_EEPROM: {
2971 		int i;
2972 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2973 		uint8_t *buf;
2974 
2975 		if (e->offset & 3 || e->offset >= EEPROMSIZE ||
2976 		    e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) {
2977 			return (EINVAL);
2978 		}
2979 
2980 		buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2981 		if (buf == NULL) {
2982 			return (ENOMEM);
2983 		}
2984 		e->magic = EEPROM_MAGIC;
2985 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2986 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2987 
2988 		if (!error)
2989 			error = copyout(buf + e->offset, e->data, e->len);
2990 
2991 		free(buf, M_DEVBUF);
2992 		break;
2993 	}
2994 	case CHELSIO_CLEAR_STATS: {
2995 		if (!(sc->flags & FULL_INIT_DONE))
2996 			return EAGAIN;
2997 
2998 		PORT_LOCK(pi);
2999 		t3_mac_update_stats(&pi->mac);
3000 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3001 		PORT_UNLOCK(pi);
3002 		break;
3003 	}
3004 	case CHELSIO_GET_UP_LA: {
3005 		struct ch_up_la *la = (struct ch_up_la *)data;
3006 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3007 		if (buf == NULL) {
3008 			return (ENOMEM);
3009 		}
3010 		if (la->bufsize < LA_BUFSIZE)
3011 			error = ENOBUFS;
3012 
3013 		if (!error)
3014 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3015 					      &la->bufsize, buf);
3016 		if (!error)
3017 			error = copyout(buf, la->data, la->bufsize);
3018 
3019 		free(buf, M_DEVBUF);
3020 		break;
3021 	}
3022 	case CHELSIO_GET_UP_IOQS: {
3023 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3024 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3025 		uint32_t *v;
3026 
3027 		if (buf == NULL) {
3028 			return (ENOMEM);
3029 		}
3030 		if (ioqs->bufsize < IOQS_BUFSIZE)
3031 			error = ENOBUFS;
3032 
3033 		if (!error)
3034 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3035 
3036 		if (!error) {
3037 			v = (uint32_t *)buf;
3038 
3039 			ioqs->ioq_rx_enable = *v++;
3040 			ioqs->ioq_tx_enable = *v++;
3041 			ioqs->ioq_rx_status = *v++;
3042 			ioqs->ioq_tx_status = *v++;
3043 
3044 			error = copyout(v, ioqs->data, ioqs->bufsize);
3045 		}
3046 
3047 		free(buf, M_DEVBUF);
3048 		break;
3049 	}
3050 	case CHELSIO_SET_FILTER: {
3051 		struct ch_filter *f = (struct ch_filter *)data;
3052 		struct filter_info *p;
3053 		unsigned int nfilters = sc->params.mc5.nfilters;
3054 
3055 		if (!is_offload(sc))
3056 			return (EOPNOTSUPP);	/* No TCAM */
3057 		if (!(sc->flags & FULL_INIT_DONE))
3058 			return (EAGAIN);	/* mc5 not setup yet */
3059 		if (nfilters == 0)
3060 			return (EBUSY);		/* TOE will use TCAM */
3061 
3062 		/* sanity checks */
3063 		if (f->filter_id >= nfilters ||
3064 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3065 		    (f->val.sport && f->mask.sport != 0xffff) ||
3066 		    (f->val.dport && f->mask.dport != 0xffff) ||
3067 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3068 		    (f->val.vlan_prio &&
3069 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3070 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3071 		    f->qset >= SGE_QSETS ||
3072 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3073 			return (EINVAL);
3074 
3075 		/* Was allocated with M_WAITOK */
3076 		KASSERT(sc->filters, ("filter table NULL\n"));
3077 
3078 		p = &sc->filters[f->filter_id];
3079 		if (p->locked)
3080 			return (EPERM);
3081 
3082 		bzero(p, sizeof(*p));
3083 		p->sip = f->val.sip;
3084 		p->sip_mask = f->mask.sip;
3085 		p->dip = f->val.dip;
3086 		p->sport = f->val.sport;
3087 		p->dport = f->val.dport;
3088 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3089 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3090 		    FILTER_NO_VLAN_PRI;
3091 		p->mac_hit = f->mac_hit;
3092 		p->mac_vld = f->mac_addr_idx != 0xffff;
3093 		p->mac_idx = f->mac_addr_idx;
3094 		p->pkt_type = f->proto;
3095 		p->report_filter_id = f->want_filter_id;
3096 		p->pass = f->pass;
3097 		p->rss = f->rss;
3098 		p->qset = f->qset;
3099 
3100 		error = set_filter(sc, f->filter_id, p);
3101 		if (error == 0)
3102 			p->valid = 1;
3103 		break;
3104 	}
3105 	case CHELSIO_DEL_FILTER: {
3106 		struct ch_filter *f = (struct ch_filter *)data;
3107 		struct filter_info *p;
3108 		unsigned int nfilters = sc->params.mc5.nfilters;
3109 
3110 		if (!is_offload(sc))
3111 			return (EOPNOTSUPP);
3112 		if (!(sc->flags & FULL_INIT_DONE))
3113 			return (EAGAIN);
3114 		if (nfilters == 0 || sc->filters == NULL)
3115 			return (EINVAL);
3116 		if (f->filter_id >= nfilters)
3117 		       return (EINVAL);
3118 
3119 		p = &sc->filters[f->filter_id];
3120 		if (p->locked)
3121 			return (EPERM);
3122 		if (!p->valid)
3123 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3124 
3125 		bzero(p, sizeof(*p));
3126 		p->sip = p->sip_mask = 0xffffffff;
3127 		p->vlan = 0xfff;
3128 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3129 		p->pkt_type = 1;
3130 		error = set_filter(sc, f->filter_id, p);
3131 		break;
3132 	}
3133 	case CHELSIO_GET_FILTER: {
3134 		struct ch_filter *f = (struct ch_filter *)data;
3135 		struct filter_info *p;
3136 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3137 
3138 		if (!is_offload(sc))
3139 			return (EOPNOTSUPP);
3140 		if (!(sc->flags & FULL_INIT_DONE))
3141 			return (EAGAIN);
3142 		if (nfilters == 0 || sc->filters == NULL)
3143 			return (EINVAL);
3144 
3145 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3146 		for (; i < nfilters; i++) {
3147 			p = &sc->filters[i];
3148 			if (!p->valid)
3149 				continue;
3150 
3151 			bzero(f, sizeof(*f));
3152 
3153 			f->filter_id = i;
3154 			f->val.sip = p->sip;
3155 			f->mask.sip = p->sip_mask;
3156 			f->val.dip = p->dip;
3157 			f->mask.dip = p->dip ? 0xffffffff : 0;
3158 			f->val.sport = p->sport;
3159 			f->mask.sport = p->sport ? 0xffff : 0;
3160 			f->val.dport = p->dport;
3161 			f->mask.dport = p->dport ? 0xffff : 0;
3162 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3163 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3164 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3165 			    0 : p->vlan_prio;
3166 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3167 			    0 : FILTER_NO_VLAN_PRI;
3168 			f->mac_hit = p->mac_hit;
3169 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3170 			f->proto = p->pkt_type;
3171 			f->want_filter_id = p->report_filter_id;
3172 			f->pass = p->pass;
3173 			f->rss = p->rss;
3174 			f->qset = p->qset;
3175 
3176 			break;
3177 		}
3178 
3179 		if (i == nfilters)
3180 			f->filter_id = 0xffffffff;
3181 		break;
3182 	}
3183 	default:
3184 		return (EOPNOTSUPP);
3185 		break;
3186 	}
3187 
3188 	return (error);
3189 }
3190 
3191 static __inline void
3192 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3193     unsigned int end)
3194 {
3195 	uint32_t *p = (uint32_t *)(buf + start);
3196 
3197 	for ( ; start <= end; start += sizeof(uint32_t))
3198 		*p++ = t3_read_reg(ap, start);
3199 }
3200 
3201 #define T3_REGMAP_SIZE (3 * 1024)
3202 static int
3203 cxgb_get_regs_len(void)
3204 {
3205 	return T3_REGMAP_SIZE;
3206 }
3207 
3208 static void
3209 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3210 {
3211 
3212 	/*
3213 	 * Version scheme:
3214 	 * bits 0..9: chip version
3215 	 * bits 10..15: chip revision
3216 	 * bit 31: set for PCIe cards
3217 	 */
3218 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3219 
3220 	/*
3221 	 * We skip the MAC statistics registers because they are clear-on-read.
3222 	 * Also reading multi-register stats would need to synchronize with the
3223 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3224 	 */
3225 	memset(buf, 0, cxgb_get_regs_len());
3226 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3227 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3228 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3229 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3230 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3231 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3232 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3233 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3234 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3235 }
3236 
3237 static int
3238 alloc_filters(struct adapter *sc)
3239 {
3240 	struct filter_info *p;
3241 	unsigned int nfilters = sc->params.mc5.nfilters;
3242 
3243 	if (nfilters == 0)
3244 		return (0);
3245 
3246 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3247 	sc->filters = p;
3248 
3249 	p = &sc->filters[nfilters - 1];
3250 	p->vlan = 0xfff;
3251 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3252 	p->pass = p->rss = p->valid = p->locked = 1;
3253 
3254 	return (0);
3255 }
3256 
3257 static int
3258 setup_hw_filters(struct adapter *sc)
3259 {
3260 	int i, rc;
3261 	unsigned int nfilters = sc->params.mc5.nfilters;
3262 
3263 	if (!sc->filters)
3264 		return (0);
3265 
3266 	t3_enable_filters(sc);
3267 
3268 	for (i = rc = 0; i < nfilters && !rc; i++) {
3269 		if (sc->filters[i].locked)
3270 			rc = set_filter(sc, i, &sc->filters[i]);
3271 	}
3272 
3273 	return (rc);
3274 }
3275 
3276 static int
3277 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3278 {
3279 	int len;
3280 	struct mbuf *m;
3281 	struct ulp_txpkt *txpkt;
3282 	struct work_request_hdr *wr;
3283 	struct cpl_pass_open_req *oreq;
3284 	struct cpl_set_tcb_field *sreq;
3285 
3286 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3287 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3288 
3289 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3290 	      sc->params.mc5.nfilters;
3291 
3292 	m = m_gethdr(M_WAITOK, MT_DATA);
3293 	m->m_len = m->m_pkthdr.len = len;
3294 	bzero(mtod(m, char *), len);
3295 
3296 	wr = mtod(m, struct work_request_hdr *);
3297 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3298 
3299 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3300 	txpkt = (struct ulp_txpkt *)oreq;
3301 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3302 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3303 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3304 	oreq->local_port = htons(f->dport);
3305 	oreq->peer_port = htons(f->sport);
3306 	oreq->local_ip = htonl(f->dip);
3307 	oreq->peer_ip = htonl(f->sip);
3308 	oreq->peer_netmask = htonl(f->sip_mask);
3309 	oreq->opt0h = 0;
3310 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3311 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3312 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3313 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3314 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3315 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3316 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3317 
3318 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3319 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3320 			  (f->report_filter_id << 15) | (1 << 23) |
3321 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3322 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3323 	t3_mgmt_tx(sc, m);
3324 
3325 	if (f->pass && !f->rss) {
3326 		len = sizeof(*sreq);
3327 		m = m_gethdr(M_WAITOK, MT_DATA);
3328 		m->m_len = m->m_pkthdr.len = len;
3329 		bzero(mtod(m, char *), len);
3330 		sreq = mtod(m, struct cpl_set_tcb_field *);
3331 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3332 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3333 				 (u64)sc->rrss_map[f->qset] << 19);
3334 		t3_mgmt_tx(sc, m);
3335 	}
3336 	return 0;
3337 }
3338 
3339 static inline void
3340 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3341     unsigned int word, u64 mask, u64 val)
3342 {
3343 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3344 	req->reply = V_NO_REPLY(1);
3345 	req->cpu_idx = 0;
3346 	req->word = htons(word);
3347 	req->mask = htobe64(mask);
3348 	req->val = htobe64(val);
3349 }
3350 
3351 static inline void
3352 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3353     unsigned int word, u64 mask, u64 val)
3354 {
3355 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3356 
3357 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3358 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3359 	mk_set_tcb_field(req, tid, word, mask, val);
3360 }
3361 
3362 void
3363 t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3364 {
3365 	struct adapter *sc;
3366 
3367 	mtx_lock(&t3_list_lock);
3368 	SLIST_FOREACH(sc, &t3_list, link) {
3369 		/*
3370 		 * func should not make any assumptions about what state sc is
3371 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3372 		 */
3373 		func(sc, arg);
3374 	}
3375 	mtx_unlock(&t3_list_lock);
3376 }
3377 
3378 #ifdef TCP_OFFLOAD
3379 static int
3380 toe_capability(struct port_info *pi, int enable)
3381 {
3382 	int rc;
3383 	struct adapter *sc = pi->adapter;
3384 
3385 	ADAPTER_LOCK_ASSERT_OWNED(sc);
3386 
3387 	if (!is_offload(sc))
3388 		return (ENODEV);
3389 
3390 	if (enable) {
3391 		if (!(sc->flags & FULL_INIT_DONE)) {
3392 			log(LOG_WARNING,
3393 			    "You must enable a cxgb interface first\n");
3394 			return (EAGAIN);
3395 		}
3396 
3397 		if (isset(&sc->offload_map, pi->port_id))
3398 			return (0);
3399 
3400 		if (!(sc->flags & TOM_INIT_DONE)) {
3401 			rc = t3_activate_uld(sc, ULD_TOM);
3402 			if (rc == EAGAIN) {
3403 				log(LOG_WARNING,
3404 				    "You must kldload t3_tom.ko before trying "
3405 				    "to enable TOE on a cxgb interface.\n");
3406 			}
3407 			if (rc != 0)
3408 				return (rc);
3409 			KASSERT(sc->tom_softc != NULL,
3410 			    ("%s: TOM activated but softc NULL", __func__));
3411 			KASSERT(sc->flags & TOM_INIT_DONE,
3412 			    ("%s: TOM activated but flag not set", __func__));
3413 		}
3414 
3415 		setbit(&sc->offload_map, pi->port_id);
3416 
3417 		/*
3418 		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3419 		 * enabled on any port.  Need to figure out how to enable,
3420 		 * disable, load, and unload iWARP cleanly.
3421 		 */
3422 		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3423 		    t3_activate_uld(sc, ULD_IWARP) == 0)
3424 			setbit(&sc->offload_map, MAX_NPORTS);
3425 	} else {
3426 		if (!isset(&sc->offload_map, pi->port_id))
3427 			return (0);
3428 
3429 		KASSERT(sc->flags & TOM_INIT_DONE,
3430 		    ("%s: TOM never initialized?", __func__));
3431 		clrbit(&sc->offload_map, pi->port_id);
3432 	}
3433 
3434 	return (0);
3435 }
3436 
3437 /*
3438  * Add an upper layer driver to the global list.
3439  */
3440 int
3441 t3_register_uld(struct uld_info *ui)
3442 {
3443 	int rc = 0;
3444 	struct uld_info *u;
3445 
3446 	mtx_lock(&t3_uld_list_lock);
3447 	SLIST_FOREACH(u, &t3_uld_list, link) {
3448 	    if (u->uld_id == ui->uld_id) {
3449 		    rc = EEXIST;
3450 		    goto done;
3451 	    }
3452 	}
3453 
3454 	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3455 	ui->refcount = 0;
3456 done:
3457 	mtx_unlock(&t3_uld_list_lock);
3458 	return (rc);
3459 }
3460 
3461 int
3462 t3_unregister_uld(struct uld_info *ui)
3463 {
3464 	int rc = EINVAL;
3465 	struct uld_info *u;
3466 
3467 	mtx_lock(&t3_uld_list_lock);
3468 
3469 	SLIST_FOREACH(u, &t3_uld_list, link) {
3470 	    if (u == ui) {
3471 		    if (ui->refcount > 0) {
3472 			    rc = EBUSY;
3473 			    goto done;
3474 		    }
3475 
3476 		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3477 		    rc = 0;
3478 		    goto done;
3479 	    }
3480 	}
3481 done:
3482 	mtx_unlock(&t3_uld_list_lock);
3483 	return (rc);
3484 }
3485 
3486 int
3487 t3_activate_uld(struct adapter *sc, int id)
3488 {
3489 	int rc = EAGAIN;
3490 	struct uld_info *ui;
3491 
3492 	mtx_lock(&t3_uld_list_lock);
3493 
3494 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3495 		if (ui->uld_id == id) {
3496 			rc = ui->activate(sc);
3497 			if (rc == 0)
3498 				ui->refcount++;
3499 			goto done;
3500 		}
3501 	}
3502 done:
3503 	mtx_unlock(&t3_uld_list_lock);
3504 
3505 	return (rc);
3506 }
3507 
3508 int
3509 t3_deactivate_uld(struct adapter *sc, int id)
3510 {
3511 	int rc = EINVAL;
3512 	struct uld_info *ui;
3513 
3514 	mtx_lock(&t3_uld_list_lock);
3515 
3516 	SLIST_FOREACH(ui, &t3_uld_list, link) {
3517 		if (ui->uld_id == id) {
3518 			rc = ui->deactivate(sc);
3519 			if (rc == 0)
3520 				ui->refcount--;
3521 			goto done;
3522 		}
3523 	}
3524 done:
3525 	mtx_unlock(&t3_uld_list_lock);
3526 
3527 	return (rc);
3528 }
3529 
3530 static int
3531 cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3532     struct mbuf *m)
3533 {
3534 	m_freem(m);
3535 	return (EDOOFUS);
3536 }
3537 
3538 int
3539 t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3540 {
3541 	uintptr_t *loc, new;
3542 
3543 	if (opcode >= NUM_CPL_HANDLERS)
3544 		return (EINVAL);
3545 
3546 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3547 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3548 	atomic_store_rel_ptr(loc, new);
3549 
3550 	return (0);
3551 }
3552 #endif
3553 
3554 static int
3555 cxgbc_mod_event(module_t mod, int cmd, void *arg)
3556 {
3557 	int rc = 0;
3558 
3559 	switch (cmd) {
3560 	case MOD_LOAD:
3561 		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3562 		SLIST_INIT(&t3_list);
3563 #ifdef TCP_OFFLOAD
3564 		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3565 		SLIST_INIT(&t3_uld_list);
3566 #endif
3567 		break;
3568 
3569 	case MOD_UNLOAD:
3570 #ifdef TCP_OFFLOAD
3571 		mtx_lock(&t3_uld_list_lock);
3572 		if (!SLIST_EMPTY(&t3_uld_list)) {
3573 			rc = EBUSY;
3574 			mtx_unlock(&t3_uld_list_lock);
3575 			break;
3576 		}
3577 		mtx_unlock(&t3_uld_list_lock);
3578 		mtx_destroy(&t3_uld_list_lock);
3579 #endif
3580 		mtx_lock(&t3_list_lock);
3581 		if (!SLIST_EMPTY(&t3_list)) {
3582 			rc = EBUSY;
3583 			mtx_unlock(&t3_list_lock);
3584 			break;
3585 		}
3586 		mtx_unlock(&t3_list_lock);
3587 		mtx_destroy(&t3_list_lock);
3588 		break;
3589 	}
3590 
3591 	return (rc);
3592 }
3593 
3594 #ifdef DEBUGNET
3595 static void
3596 cxgb_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
3597 {
3598 	struct port_info *pi;
3599 	adapter_t *adap;
3600 
3601 	pi = if_getsoftc(ifp);
3602 	adap = pi->adapter;
3603 	ADAPTER_LOCK(adap);
3604 	*nrxr = adap->nqsets;
3605 	*ncl = adap->sge.qs[0].fl[1].size;
3606 	*clsize = adap->sge.qs[0].fl[1].buf_size;
3607 	ADAPTER_UNLOCK(adap);
3608 }
3609 
3610 static void
3611 cxgb_debugnet_event(if_t ifp, enum debugnet_ev event)
3612 {
3613 	struct port_info *pi;
3614 	struct sge_qset *qs;
3615 	int i;
3616 
3617 	pi = if_getsoftc(ifp);
3618 	if (event == DEBUGNET_START)
3619 		for (i = 0; i < pi->adapter->nqsets; i++) {
3620 			qs = &pi->adapter->sge.qs[i];
3621 
3622 			/* Need to reinit after debugnet_mbuf_start(). */
3623 			qs->fl[0].zone = zone_pack;
3624 			qs->fl[1].zone = zone_clust;
3625 			qs->lro.enabled = 0;
3626 		}
3627 }
3628 
3629 static int
3630 cxgb_debugnet_transmit(if_t ifp, struct mbuf *m)
3631 {
3632 	struct port_info *pi;
3633 	struct sge_qset *qs;
3634 
3635 	pi = if_getsoftc(ifp);
3636 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3637 	    IFF_DRV_RUNNING)
3638 		return (ENOENT);
3639 
3640 	qs = &pi->adapter->sge.qs[pi->first_qset];
3641 	return (cxgb_debugnet_encap(qs, &m));
3642 }
3643 
3644 static int
3645 cxgb_debugnet_poll(if_t ifp, int count)
3646 {
3647 	struct port_info *pi;
3648 	adapter_t *adap;
3649 	int i;
3650 
3651 	pi = if_getsoftc(ifp);
3652 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3653 		return (ENOENT);
3654 
3655 	adap = pi->adapter;
3656 	for (i = 0; i < adap->nqsets; i++)
3657 		(void)cxgb_debugnet_poll_rx(adap, &adap->sge.qs[i]);
3658 	(void)cxgb_debugnet_poll_tx(&adap->sge.qs[pi->first_qset]);
3659 	return (0);
3660 }
3661 #endif /* DEBUGNET */
3662