xref: /freebsd/sys/dev/cxgbe/t4_main.c (revision 9005607c8fa7317a759f1fc16cae4738f9a2fbb3)
1 /*-
2  * Copyright (c) 2011 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/conf.h>
36 #include <sys/priv.h>
37 #include <sys/kernel.h>
38 #include <sys/bus.h>
39 #include <sys/module.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/taskqueue.h>
43 #include <sys/pciio.h>
44 #include <dev/pci/pcireg.h>
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pci_private.h>
47 #include <sys/firmware.h>
48 #include <sys/sbuf.h>
49 #include <sys/smp.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/sysctl.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/if_dl.h>
57 #include <net/if_vlan_var.h>
58 
59 #include "common/common.h"
60 #include "common/t4_msg.h"
61 #include "common/t4_regs.h"
62 #include "common/t4_regs_values.h"
63 #include "t4_ioctl.h"
64 #include "t4_l2t.h"
65 
66 /* T4 bus driver interface */
67 static int t4_probe(device_t);
68 static int t4_attach(device_t);
69 static int t4_detach(device_t);
70 static device_method_t t4_methods[] = {
71 	DEVMETHOD(device_probe,		t4_probe),
72 	DEVMETHOD(device_attach,	t4_attach),
73 	DEVMETHOD(device_detach,	t4_detach),
74 
75 	DEVMETHOD_END
76 };
77 static driver_t t4_driver = {
78 	"t4nex",
79 	t4_methods,
80 	sizeof(struct adapter)
81 };
82 
83 
84 /* T4 port (cxgbe) interface */
85 static int cxgbe_probe(device_t);
86 static int cxgbe_attach(device_t);
87 static int cxgbe_detach(device_t);
88 static device_method_t cxgbe_methods[] = {
89 	DEVMETHOD(device_probe,		cxgbe_probe),
90 	DEVMETHOD(device_attach,	cxgbe_attach),
91 	DEVMETHOD(device_detach,	cxgbe_detach),
92 	{ 0, 0 }
93 };
94 static driver_t cxgbe_driver = {
95 	"cxgbe",
96 	cxgbe_methods,
97 	sizeof(struct port_info)
98 };
99 
100 static d_ioctl_t t4_ioctl;
101 static d_open_t t4_open;
102 static d_close_t t4_close;
103 
104 static struct cdevsw t4_cdevsw = {
105        .d_version = D_VERSION,
106        .d_flags = 0,
107        .d_open = t4_open,
108        .d_close = t4_close,
109        .d_ioctl = t4_ioctl,
110        .d_name = "t4nex",
111 };
112 
113 /* ifnet + media interface */
114 static void cxgbe_init(void *);
115 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
116 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
117 static void cxgbe_qflush(struct ifnet *);
118 static int cxgbe_media_change(struct ifnet *);
119 static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
120 
121 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4 Ethernet driver and services");
122 
123 /*
124  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
125  * then ADAPTER_LOCK, then t4_uld_list_lock.
126  */
127 static struct mtx t4_list_lock;
128 static SLIST_HEAD(, adapter) t4_list;
129 #ifdef TCP_OFFLOAD
130 static struct mtx t4_uld_list_lock;
131 static SLIST_HEAD(, uld_info) t4_uld_list;
132 #endif
133 
134 /*
135  * Tunables.  See tweak_tunables() too.
136  */
137 
138 /*
139  * Number of queues for tx and rx, 10G and 1G, NIC and offload.
140  */
141 #define NTXQ_10G 16
142 static int t4_ntxq10g = -1;
143 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
144 
145 #define NRXQ_10G 8
146 static int t4_nrxq10g = -1;
147 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
148 
149 #define NTXQ_1G 4
150 static int t4_ntxq1g = -1;
151 TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
152 
153 #define NRXQ_1G 2
154 static int t4_nrxq1g = -1;
155 TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
156 
157 #ifdef TCP_OFFLOAD
158 #define NOFLDTXQ_10G 8
159 static int t4_nofldtxq10g = -1;
160 TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
161 
162 #define NOFLDRXQ_10G 2
163 static int t4_nofldrxq10g = -1;
164 TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
165 
166 #define NOFLDTXQ_1G 2
167 static int t4_nofldtxq1g = -1;
168 TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
169 
170 #define NOFLDRXQ_1G 1
171 static int t4_nofldrxq1g = -1;
172 TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
173 #endif
174 
175 /*
176  * Holdoff parameters for 10G and 1G ports.
177  */
178 #define TMR_IDX_10G 1
179 static int t4_tmr_idx_10g = TMR_IDX_10G;
180 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
181 
182 #define PKTC_IDX_10G (-1)
183 static int t4_pktc_idx_10g = PKTC_IDX_10G;
184 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
185 
186 #define TMR_IDX_1G 1
187 static int t4_tmr_idx_1g = TMR_IDX_1G;
188 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
189 
190 #define PKTC_IDX_1G (-1)
191 static int t4_pktc_idx_1g = PKTC_IDX_1G;
192 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
193 
194 /*
195  * Size (# of entries) of each tx and rx queue.
196  */
197 static unsigned int t4_qsize_txq = TX_EQ_QSIZE;
198 TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
199 
200 static unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
201 TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
202 
203 /*
204  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
205  */
206 static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
207 TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
208 
209 /*
210  * Configuration file.
211  */
212 static char t4_cfg_file[32] = "default";
213 TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
214 
215 /*
216  * ASIC features that will be used.  Disable the ones you don't want so that the
217  * chip resources aren't wasted on features that will not be used.
218  */
219 static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
220 TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
221 
222 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
223 TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
224 
225 static int t4_toecaps_allowed = -1;
226 TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
227 
228 static int t4_rdmacaps_allowed = 0;
229 TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
230 
231 static int t4_iscsicaps_allowed = 0;
232 TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
233 
234 static int t4_fcoecaps_allowed = 0;
235 TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
236 
237 struct intrs_and_queues {
238 	int intr_type;		/* INTx, MSI, or MSI-X */
239 	int nirq;		/* Number of vectors */
240 	int intr_flags;
241 	int ntxq10g;		/* # of NIC txq's for each 10G port */
242 	int nrxq10g;		/* # of NIC rxq's for each 10G port */
243 	int ntxq1g;		/* # of NIC txq's for each 1G port */
244 	int nrxq1g;		/* # of NIC rxq's for each 1G port */
245 #ifdef TCP_OFFLOAD
246 	int nofldtxq10g;	/* # of TOE txq's for each 10G port */
247 	int nofldrxq10g;	/* # of TOE rxq's for each 10G port */
248 	int nofldtxq1g;		/* # of TOE txq's for each 1G port */
249 	int nofldrxq1g;		/* # of TOE rxq's for each 1G port */
250 #endif
251 };
252 
253 struct filter_entry {
254         uint32_t valid:1;	/* filter allocated and valid */
255         uint32_t locked:1;	/* filter is administratively locked */
256         uint32_t pending:1;	/* filter action is pending firmware reply */
257 	uint32_t smtidx:8;	/* Source MAC Table index for smac */
258 	struct l2t_entry *l2t;	/* Layer Two Table entry for dmac */
259 
260         struct t4_filter_specification fs;
261 };
262 
263 enum {
264 	XGMAC_MTU	= (1 << 0),
265 	XGMAC_PROMISC	= (1 << 1),
266 	XGMAC_ALLMULTI	= (1 << 2),
267 	XGMAC_VLANEX	= (1 << 3),
268 	XGMAC_UCADDR	= (1 << 4),
269 	XGMAC_MCADDRS	= (1 << 5),
270 
271 	XGMAC_ALL	= 0xffff
272 };
273 
274 static int map_bars(struct adapter *);
275 static void setup_memwin(struct adapter *);
276 static int cfg_itype_and_nqueues(struct adapter *, int, int,
277     struct intrs_and_queues *);
278 static int prep_firmware(struct adapter *);
279 static int upload_config_file(struct adapter *, const struct firmware *,
280     uint32_t *, uint32_t *);
281 static int partition_resources(struct adapter *, const struct firmware *);
282 static int get_params__pre_init(struct adapter *);
283 static int get_params__post_init(struct adapter *);
284 static void t4_set_desc(struct adapter *);
285 static void build_medialist(struct port_info *);
286 static int update_mac_settings(struct port_info *, int);
287 static int cxgbe_init_synchronized(struct port_info *);
288 static int cxgbe_uninit_synchronized(struct port_info *);
289 static int setup_intr_handlers(struct adapter *);
290 static int adapter_full_init(struct adapter *);
291 static int adapter_full_uninit(struct adapter *);
292 static int port_full_init(struct port_info *);
293 static int port_full_uninit(struct port_info *);
294 static void quiesce_eq(struct adapter *, struct sge_eq *);
295 static void quiesce_iq(struct adapter *, struct sge_iq *);
296 static void quiesce_fl(struct adapter *, struct sge_fl *);
297 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
298     driver_intr_t *, void *, char *);
299 static int t4_free_irq(struct adapter *, struct irq *);
300 static void reg_block_dump(struct adapter *, uint8_t *, unsigned int,
301     unsigned int);
302 static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
303 static void cxgbe_tick(void *);
304 static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
305 static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
306     struct mbuf *);
307 static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
308 static int fw_msg_not_handled(struct adapter *, const __be64 *);
309 static int t4_sysctls(struct adapter *);
310 static int cxgbe_sysctls(struct port_info *);
311 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
312 static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
313 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
314 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
315 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
316 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
317 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
318 #ifdef SBUF_DRAIN
319 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
320 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
321 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
322 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
323 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
324 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
325 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
326 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
327 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
328 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
329 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
330 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
331 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
332 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
333 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
334 #endif
335 static inline void txq_start(struct ifnet *, struct sge_txq *);
336 static uint32_t fconf_to_mode(uint32_t);
337 static uint32_t mode_to_fconf(uint32_t);
338 static uint32_t fspec_to_fconf(struct t4_filter_specification *);
339 static int get_filter_mode(struct adapter *, uint32_t *);
340 static int set_filter_mode(struct adapter *, uint32_t);
341 static inline uint64_t get_filter_hits(struct adapter *, uint32_t);
342 static int get_filter(struct adapter *, struct t4_filter *);
343 static int set_filter(struct adapter *, struct t4_filter *);
344 static int del_filter(struct adapter *, struct t4_filter *);
345 static void clear_filter(struct filter_entry *);
346 static int set_filter_wr(struct adapter *, int);
347 static int del_filter_wr(struct adapter *, int);
348 static int get_sge_context(struct adapter *, struct t4_sge_context *);
349 static int load_fw(struct adapter *, struct t4_data *);
350 static int read_card_mem(struct adapter *, struct t4_mem_range *);
351 static int read_i2c(struct adapter *, struct t4_i2c_data *);
352 #ifdef TCP_OFFLOAD
353 static int toe_capability(struct port_info *, int);
354 #endif
355 static int t4_mod_event(module_t, int, void *);
356 
357 struct t4_pciids {
358 	uint16_t device;
359 	char *desc;
360 } t4_pciids[] = {
361 	{0xa000, "Chelsio Terminator 4 FPGA"},
362 	{0x4400, "Chelsio T440-dbg"},
363 	{0x4401, "Chelsio T420-CR"},
364 	{0x4402, "Chelsio T422-CR"},
365 	{0x4403, "Chelsio T440-CR"},
366 	{0x4404, "Chelsio T420-BCH"},
367 	{0x4405, "Chelsio T440-BCH"},
368 	{0x4406, "Chelsio T440-CH"},
369 	{0x4407, "Chelsio T420-SO"},
370 	{0x4408, "Chelsio T420-CX"},
371 	{0x4409, "Chelsio T420-BT"},
372 	{0x440a, "Chelsio T404-BT"},
373 	{0x440e, "Chelsio T440-LP-CR"},
374 };
375 
376 #ifdef TCP_OFFLOAD
377 /*
378  * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
379  * exactly the same for both rxq and ofld_rxq.
380  */
381 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
382 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
383 #endif
384 
385 /* No easy way to include t4_msg.h before adapter.h so we check this way */
386 CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS);
387 CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES);
388 
389 static int
390 t4_probe(device_t dev)
391 {
392 	int i;
393 	uint16_t v = pci_get_vendor(dev);
394 	uint16_t d = pci_get_device(dev);
395 	uint8_t f = pci_get_function(dev);
396 
397 	if (v != PCI_VENDOR_ID_CHELSIO)
398 		return (ENXIO);
399 
400 	/* Attach only to PF0 of the FPGA */
401 	if (d == 0xa000 && f != 0)
402 		return (ENXIO);
403 
404 	for (i = 0; i < nitems(t4_pciids); i++) {
405 		if (d == t4_pciids[i].device) {
406 			device_set_desc(dev, t4_pciids[i].desc);
407 			return (BUS_PROBE_DEFAULT);
408 		}
409 	}
410 
411 	return (ENXIO);
412 }
413 
414 static int
415 t4_attach(device_t dev)
416 {
417 	struct adapter *sc;
418 	int rc = 0, i, n10g, n1g, rqidx, tqidx;
419 	struct intrs_and_queues iaq;
420 	struct sge *s;
421 #ifdef TCP_OFFLOAD
422 	int ofld_rqidx, ofld_tqidx;
423 #endif
424 
425 	sc = device_get_softc(dev);
426 	sc->dev = dev;
427 
428 	pci_enable_busmaster(dev);
429 	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
430 		uint32_t v;
431 
432 		pci_set_max_read_req(dev, 4096);
433 		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
434 		v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
435 		pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
436 	}
437 
438 	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
439 	    device_get_nameunit(dev));
440 	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
441 	mtx_lock(&t4_list_lock);
442 	SLIST_INSERT_HEAD(&t4_list, sc, link);
443 	mtx_unlock(&t4_list_lock);
444 
445 	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
446 	TAILQ_INIT(&sc->sfl);
447 	callout_init(&sc->sfl_callout, CALLOUT_MPSAFE);
448 
449 	rc = map_bars(sc);
450 	if (rc != 0)
451 		goto done; /* error message displayed already */
452 
453 	/*
454 	 * This is the real PF# to which we're attaching.  Works from within PCI
455 	 * passthrough environments too, where pci_get_function() could return a
456 	 * different PF# depending on the passthrough configuration.  We need to
457 	 * use the real PF# in all our communication with the firmware.
458 	 */
459 	sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI));
460 	sc->mbox = sc->pf;
461 
462 	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
463 	sc->an_handler = an_not_handled;
464 	for (i = 0; i < nitems(sc->cpl_handler); i++)
465 		sc->cpl_handler[i] = cpl_not_handled;
466 	for (i = 0; i < nitems(sc->fw_msg_handler); i++)
467 		sc->fw_msg_handler[i] = fw_msg_not_handled;
468 	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
469 
470 	/* Prepare the adapter for operation */
471 	rc = -t4_prep_adapter(sc);
472 	if (rc != 0) {
473 		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
474 		goto done;
475 	}
476 
477 	/*
478 	 * Do this really early, with the memory windows set up even before the
479 	 * character device.  The userland tool's register i/o and mem read
480 	 * will work even in "recovery mode".
481 	 */
482 	setup_memwin(sc);
483 	sc->cdev = make_dev(&t4_cdevsw, device_get_unit(dev), UID_ROOT,
484 	    GID_WHEEL, 0600, "%s", device_get_nameunit(dev));
485 	sc->cdev->si_drv1 = sc;
486 
487 	/* Go no further if recovery mode has been requested. */
488 	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
489 		device_printf(dev, "recovery mode.\n");
490 		goto done;
491 	}
492 
493 	/* Prepare the firmware for operation */
494 	rc = prep_firmware(sc);
495 	if (rc != 0)
496 		goto done; /* error message displayed already */
497 
498 	rc = get_params__pre_init(sc);
499 	if (rc != 0)
500 		goto done; /* error message displayed already */
501 
502 	rc = t4_sge_init(sc);
503 	if (rc != 0)
504 		goto done; /* error message displayed already */
505 
506 	if (sc->flags & MASTER_PF) {
507 		/* get basic stuff going */
508 		rc = -t4_fw_initialize(sc, sc->mbox);
509 		if (rc != 0) {
510 			device_printf(dev, "early init failed: %d.\n", rc);
511 			goto done;
512 		}
513 	}
514 
515 	rc = get_params__post_init(sc);
516 	if (rc != 0)
517 		goto done; /* error message displayed already */
518 
519 	if (sc->flags & MASTER_PF) {
520 		uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
521 
522 		/* final tweaks to some settings */
523 
524 		t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd,
525 		    sc->params.b_wnd);
526 		/* 4K, 16K, 64K, 256K DDP "page sizes" */
527 		t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(0) | V_HPZ1(2) |
528 		    V_HPZ2(4) | V_HPZ3(6));
529 		t4_set_reg_field(sc, A_ULP_RX_CTL, F_TDDPTAGTCB, F_TDDPTAGTCB);
530 		t4_set_reg_field(sc, A_TP_PARA_REG5,
531 		    V_INDICATESIZE(M_INDICATESIZE) |
532 		    F_REARMDDPOFFSET | F_RESETDDPOFFSET,
533 		    V_INDICATESIZE(indsz) |
534 		    F_REARMDDPOFFSET | F_RESETDDPOFFSET);
535 	} else {
536 		/*
537 		 * XXX: Verify that we can live with whatever the master driver
538 		 * has done so far, and hope that it doesn't change any global
539 		 * setting from underneath us in the future.
540 		 */
541 	}
542 
543 	t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &sc->filter_mode, 1,
544 	    A_TP_VLAN_PRI_MAP);
545 
546 	for (i = 0; i < NCHAN; i++)
547 		sc->params.tp.tx_modq[i] = i;
548 
549 	rc = t4_create_dma_tag(sc);
550 	if (rc != 0)
551 		goto done; /* error message displayed already */
552 
553 	/*
554 	 * First pass over all the ports - allocate VIs and initialize some
555 	 * basic parameters like mac address, port type, etc.  We also figure
556 	 * out whether a port is 10G or 1G and use that information when
557 	 * calculating how many interrupts to attempt to allocate.
558 	 */
559 	n10g = n1g = 0;
560 	for_each_port(sc, i) {
561 		struct port_info *pi;
562 
563 		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
564 		sc->port[i] = pi;
565 
566 		/* These must be set before t4_port_init */
567 		pi->adapter = sc;
568 		pi->port_id = i;
569 
570 		/* Allocate the vi and initialize parameters like mac addr */
571 		rc = -t4_port_init(pi, sc->mbox, sc->pf, 0);
572 		if (rc != 0) {
573 			device_printf(dev, "unable to initialize port %d: %d\n",
574 			    i, rc);
575 			free(pi, M_CXGBE);
576 			sc->port[i] = NULL;
577 			goto done;
578 		}
579 
580 		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
581 		    device_get_nameunit(dev), i);
582 		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
583 
584 		if (is_10G_port(pi)) {
585 			n10g++;
586 			pi->tmr_idx = t4_tmr_idx_10g;
587 			pi->pktc_idx = t4_pktc_idx_10g;
588 		} else {
589 			n1g++;
590 			pi->tmr_idx = t4_tmr_idx_1g;
591 			pi->pktc_idx = t4_pktc_idx_1g;
592 		}
593 
594 		pi->xact_addr_filt = -1;
595 
596 		pi->qsize_rxq = t4_qsize_rxq;
597 		pi->qsize_txq = t4_qsize_txq;
598 
599 		pi->dev = device_add_child(dev, "cxgbe", -1);
600 		if (pi->dev == NULL) {
601 			device_printf(dev,
602 			    "failed to add device for port %d.\n", i);
603 			rc = ENXIO;
604 			goto done;
605 		}
606 		device_set_softc(pi->dev, pi);
607 	}
608 
609 	/*
610 	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
611 	 */
612 	rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq);
613 	if (rc != 0)
614 		goto done; /* error message displayed already */
615 
616 	sc->intr_type = iaq.intr_type;
617 	sc->intr_count = iaq.nirq;
618 	sc->flags |= iaq.intr_flags;
619 
620 	s = &sc->sge;
621 	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
622 	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
623 	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
624 	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
625 	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
626 
627 #ifdef TCP_OFFLOAD
628 	if (is_offload(sc)) {
629 
630 		s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
631 		s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
632 		s->neq += s->nofldtxq + s->nofldrxq;
633 		s->niq += s->nofldrxq;
634 
635 		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
636 		    M_CXGBE, M_ZERO | M_WAITOK);
637 		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
638 		    M_CXGBE, M_ZERO | M_WAITOK);
639 	}
640 #endif
641 
642 	s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
643 	    M_ZERO | M_WAITOK);
644 	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
645 	    M_ZERO | M_WAITOK);
646 	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
647 	    M_ZERO | M_WAITOK);
648 	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
649 	    M_ZERO | M_WAITOK);
650 	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
651 	    M_ZERO | M_WAITOK);
652 
653 	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
654 	    M_ZERO | M_WAITOK);
655 
656 	t4_init_l2t(sc, M_WAITOK);
657 
658 	/*
659 	 * Second pass over the ports.  This time we know the number of rx and
660 	 * tx queues that each port should get.
661 	 */
662 	rqidx = tqidx = 0;
663 #ifdef TCP_OFFLOAD
664 	ofld_rqidx = ofld_tqidx = 0;
665 #endif
666 	for_each_port(sc, i) {
667 		struct port_info *pi = sc->port[i];
668 
669 		if (pi == NULL)
670 			continue;
671 
672 		pi->first_rxq = rqidx;
673 		pi->first_txq = tqidx;
674 		if (is_10G_port(pi)) {
675 			pi->nrxq = iaq.nrxq10g;
676 			pi->ntxq = iaq.ntxq10g;
677 		} else {
678 			pi->nrxq = iaq.nrxq1g;
679 			pi->ntxq = iaq.ntxq1g;
680 		}
681 
682 		rqidx += pi->nrxq;
683 		tqidx += pi->ntxq;
684 
685 #ifdef TCP_OFFLOAD
686 		if (is_offload(sc)) {
687 			pi->first_ofld_rxq = ofld_rqidx;
688 			pi->first_ofld_txq = ofld_tqidx;
689 			if (is_10G_port(pi)) {
690 				pi->nofldrxq = iaq.nofldrxq10g;
691 				pi->nofldtxq = iaq.nofldtxq10g;
692 			} else {
693 				pi->nofldrxq = iaq.nofldrxq1g;
694 				pi->nofldtxq = iaq.nofldtxq1g;
695 			}
696 			ofld_rqidx += pi->nofldrxq;
697 			ofld_tqidx += pi->nofldtxq;
698 		}
699 #endif
700 	}
701 
702 	rc = setup_intr_handlers(sc);
703 	if (rc != 0) {
704 		device_printf(dev,
705 		    "failed to setup interrupt handlers: %d\n", rc);
706 		goto done;
707 	}
708 
709 	rc = bus_generic_attach(dev);
710 	if (rc != 0) {
711 		device_printf(dev,
712 		    "failed to attach all child ports: %d\n", rc);
713 		goto done;
714 	}
715 
716 	device_printf(dev,
717 	    "PCIe x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
718 	    sc->params.pci.width, sc->params.nports, sc->intr_count,
719 	    sc->intr_type == INTR_MSIX ? "MSI-X" :
720 	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
721 	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
722 
723 	t4_set_desc(sc);
724 
725 done:
726 	if (rc != 0 && sc->cdev) {
727 		/* cdev was created and so cxgbetool works; recover that way. */
728 		device_printf(dev,
729 		    "error during attach, adapter is now in recovery mode.\n");
730 		rc = 0;
731 	}
732 
733 	if (rc != 0)
734 		t4_detach(dev);
735 	else
736 		t4_sysctls(sc);
737 
738 	return (rc);
739 }
740 
741 /*
742  * Idempotent
743  */
744 static int
745 t4_detach(device_t dev)
746 {
747 	struct adapter *sc;
748 	struct port_info *pi;
749 	int i, rc;
750 
751 	sc = device_get_softc(dev);
752 
753 	if (sc->flags & FULL_INIT_DONE)
754 		t4_intr_disable(sc);
755 
756 	if (sc->cdev) {
757 		destroy_dev(sc->cdev);
758 		sc->cdev = NULL;
759 	}
760 
761 	rc = bus_generic_detach(dev);
762 	if (rc) {
763 		device_printf(dev,
764 		    "failed to detach child devices: %d\n", rc);
765 		return (rc);
766 	}
767 
768 	for (i = 0; i < sc->intr_count; i++)
769 		t4_free_irq(sc, &sc->irq[i]);
770 
771 	for (i = 0; i < MAX_NPORTS; i++) {
772 		pi = sc->port[i];
773 		if (pi) {
774 			t4_free_vi(pi->adapter, sc->mbox, sc->pf, 0, pi->viid);
775 			if (pi->dev)
776 				device_delete_child(dev, pi->dev);
777 
778 			mtx_destroy(&pi->pi_lock);
779 			free(pi, M_CXGBE);
780 		}
781 	}
782 
783 	if (sc->flags & FULL_INIT_DONE)
784 		adapter_full_uninit(sc);
785 
786 	if (sc->flags & FW_OK)
787 		t4_fw_bye(sc, sc->mbox);
788 
789 	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
790 		pci_release_msi(dev);
791 
792 	if (sc->regs_res)
793 		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
794 		    sc->regs_res);
795 
796 	if (sc->msix_res)
797 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
798 		    sc->msix_res);
799 
800 	if (sc->l2t)
801 		t4_free_l2t(sc->l2t);
802 
803 #ifdef TCP_OFFLOAD
804 	free(sc->sge.ofld_rxq, M_CXGBE);
805 	free(sc->sge.ofld_txq, M_CXGBE);
806 #endif
807 	free(sc->irq, M_CXGBE);
808 	free(sc->sge.rxq, M_CXGBE);
809 	free(sc->sge.txq, M_CXGBE);
810 	free(sc->sge.ctrlq, M_CXGBE);
811 	free(sc->sge.iqmap, M_CXGBE);
812 	free(sc->sge.eqmap, M_CXGBE);
813 	free(sc->tids.ftid_tab, M_CXGBE);
814 	t4_destroy_dma_tag(sc);
815 	if (mtx_initialized(&sc->sc_lock)) {
816 		mtx_lock(&t4_list_lock);
817 		SLIST_REMOVE(&t4_list, sc, adapter, link);
818 		mtx_unlock(&t4_list_lock);
819 		mtx_destroy(&sc->sc_lock);
820 	}
821 
822 	if (mtx_initialized(&sc->tids.ftid_lock))
823 		mtx_destroy(&sc->tids.ftid_lock);
824 	if (mtx_initialized(&sc->sfl_lock))
825 		mtx_destroy(&sc->sfl_lock);
826 
827 	bzero(sc, sizeof(*sc));
828 
829 	return (0);
830 }
831 
832 
833 static int
834 cxgbe_probe(device_t dev)
835 {
836 	char buf[128];
837 	struct port_info *pi = device_get_softc(dev);
838 
839 	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
840 	device_set_desc_copy(dev, buf);
841 
842 	return (BUS_PROBE_DEFAULT);
843 }
844 
845 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
846     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
847     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
848 #define T4_CAP_ENABLE (T4_CAP)
849 
850 static int
851 cxgbe_attach(device_t dev)
852 {
853 	struct port_info *pi = device_get_softc(dev);
854 	struct ifnet *ifp;
855 
856 	/* Allocate an ifnet and set it up */
857 	ifp = if_alloc(IFT_ETHER);
858 	if (ifp == NULL) {
859 		device_printf(dev, "Cannot allocate ifnet\n");
860 		return (ENOMEM);
861 	}
862 	pi->ifp = ifp;
863 	ifp->if_softc = pi;
864 
865 	callout_init(&pi->tick, CALLOUT_MPSAFE);
866 
867 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
868 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
869 
870 	ifp->if_init = cxgbe_init;
871 	ifp->if_ioctl = cxgbe_ioctl;
872 	ifp->if_transmit = cxgbe_transmit;
873 	ifp->if_qflush = cxgbe_qflush;
874 
875 	ifp->if_capabilities = T4_CAP;
876 #ifdef TCP_OFFLOAD
877 	if (is_offload(pi->adapter))
878 		ifp->if_capabilities |= IFCAP_TOE;
879 #endif
880 	ifp->if_capenable = T4_CAP_ENABLE;
881 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
882 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
883 
884 	/* Initialize ifmedia for this port */
885 	ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
886 	    cxgbe_media_status);
887 	build_medialist(pi);
888 
889 	pi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
890 	    EVENTHANDLER_PRI_ANY);
891 
892 	ether_ifattach(ifp, pi->hw_addr);
893 
894 #ifdef TCP_OFFLOAD
895 	if (is_offload(pi->adapter)) {
896 		device_printf(dev,
897 		    "%d txq, %d rxq (NIC); %d txq, %d rxq (TOE)\n",
898 		    pi->ntxq, pi->nrxq, pi->nofldtxq, pi->nofldrxq);
899 	} else
900 #endif
901 		device_printf(dev, "%d txq, %d rxq\n", pi->ntxq, pi->nrxq);
902 
903 	cxgbe_sysctls(pi);
904 
905 	return (0);
906 }
907 
908 static int
909 cxgbe_detach(device_t dev)
910 {
911 	struct port_info *pi = device_get_softc(dev);
912 	struct adapter *sc = pi->adapter;
913 	struct ifnet *ifp = pi->ifp;
914 
915 	/* Tell if_ioctl and if_init that the port is going away */
916 	ADAPTER_LOCK(sc);
917 	SET_DOOMED(pi);
918 	wakeup(&sc->flags);
919 	while (IS_BUSY(sc))
920 		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
921 	SET_BUSY(sc);
922 #ifdef INVARIANTS
923 	sc->last_op = "t4detach";
924 	sc->last_op_thr = curthread;
925 #endif
926 	ADAPTER_UNLOCK(sc);
927 
928 	if (pi->vlan_c)
929 		EVENTHANDLER_DEREGISTER(vlan_config, pi->vlan_c);
930 
931 	PORT_LOCK(pi);
932 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
933 	callout_stop(&pi->tick);
934 	PORT_UNLOCK(pi);
935 	callout_drain(&pi->tick);
936 
937 	/* Let detach proceed even if these fail. */
938 	cxgbe_uninit_synchronized(pi);
939 	port_full_uninit(pi);
940 
941 	ifmedia_removeall(&pi->media);
942 	ether_ifdetach(pi->ifp);
943 	if_free(pi->ifp);
944 
945 	ADAPTER_LOCK(sc);
946 	CLR_BUSY(sc);
947 	wakeup(&sc->flags);
948 	ADAPTER_UNLOCK(sc);
949 
950 	return (0);
951 }
952 
953 static void
954 cxgbe_init(void *arg)
955 {
956 	struct port_info *pi = arg;
957 	struct adapter *sc = pi->adapter;
958 
959 	if (begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4init") != 0)
960 		return;
961 	cxgbe_init_synchronized(pi);
962 	end_synchronized_op(sc, 0);
963 }
964 
965 static int
966 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
967 {
968 	int rc = 0, mtu, flags;
969 	struct port_info *pi = ifp->if_softc;
970 	struct adapter *sc = pi->adapter;
971 	struct ifreq *ifr = (struct ifreq *)data;
972 	uint32_t mask;
973 
974 	switch (cmd) {
975 	case SIOCSIFMTU:
976 		mtu = ifr->ifr_mtu;
977 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
978 			return (EINVAL);
979 
980 		rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4mtu");
981 		if (rc)
982 			return (rc);
983 		ifp->if_mtu = mtu;
984 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
985 			t4_update_fl_bufsize(ifp);
986 			rc = update_mac_settings(pi, XGMAC_MTU);
987 		}
988 		end_synchronized_op(sc, 0);
989 		break;
990 
991 	case SIOCSIFFLAGS:
992 		rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4flg");
993 		if (rc)
994 			return (rc);
995 
996 		if (ifp->if_flags & IFF_UP) {
997 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
998 				flags = pi->if_flags;
999 				if ((ifp->if_flags ^ flags) &
1000 				    (IFF_PROMISC | IFF_ALLMULTI)) {
1001 					rc = update_mac_settings(pi,
1002 					    XGMAC_PROMISC | XGMAC_ALLMULTI);
1003 				}
1004 			} else
1005 				rc = cxgbe_init_synchronized(pi);
1006 			pi->if_flags = ifp->if_flags;
1007 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1008 			rc = cxgbe_uninit_synchronized(pi);
1009 		end_synchronized_op(sc, 0);
1010 		break;
1011 
1012 	case SIOCADDMULTI:
1013 	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
1014 		rc = begin_synchronized_op(sc, pi, HOLD_LOCK, "t4multi");
1015 		if (rc)
1016 			return (rc);
1017 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1018 			rc = update_mac_settings(pi, XGMAC_MCADDRS);
1019 		end_synchronized_op(sc, LOCK_HELD);
1020 		break;
1021 
1022 	case SIOCSIFCAP:
1023 		rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4cap");
1024 		if (rc)
1025 			return (rc);
1026 
1027 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1028 		if (mask & IFCAP_TXCSUM) {
1029 			ifp->if_capenable ^= IFCAP_TXCSUM;
1030 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1031 
1032 			if (IFCAP_TSO4 & ifp->if_capenable &&
1033 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1034 				ifp->if_capenable &= ~IFCAP_TSO4;
1035 				if_printf(ifp,
1036 				    "tso4 disabled due to -txcsum.\n");
1037 			}
1038 		}
1039 		if (mask & IFCAP_TXCSUM_IPV6) {
1040 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1041 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1042 
1043 			if (IFCAP_TSO6 & ifp->if_capenable &&
1044 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1045 				ifp->if_capenable &= ~IFCAP_TSO6;
1046 				if_printf(ifp,
1047 				    "tso6 disabled due to -txcsum6.\n");
1048 			}
1049 		}
1050 		if (mask & IFCAP_RXCSUM)
1051 			ifp->if_capenable ^= IFCAP_RXCSUM;
1052 		if (mask & IFCAP_RXCSUM_IPV6)
1053 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1054 
1055 		/*
1056 		 * Note that we leave CSUM_TSO alone (it is always set).  The
1057 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1058 		 * sending a TSO request our way, so it's sufficient to toggle
1059 		 * IFCAP_TSOx only.
1060 		 */
1061 		if (mask & IFCAP_TSO4) {
1062 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1063 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1064 				if_printf(ifp, "enable txcsum first.\n");
1065 				rc = EAGAIN;
1066 				goto fail;
1067 			}
1068 			ifp->if_capenable ^= IFCAP_TSO4;
1069 		}
1070 		if (mask & IFCAP_TSO6) {
1071 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1072 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1073 				if_printf(ifp, "enable txcsum6 first.\n");
1074 				rc = EAGAIN;
1075 				goto fail;
1076 			}
1077 			ifp->if_capenable ^= IFCAP_TSO6;
1078 		}
1079 		if (mask & IFCAP_LRO) {
1080 #if defined(INET) || defined(INET6)
1081 			int i;
1082 			struct sge_rxq *rxq;
1083 
1084 			ifp->if_capenable ^= IFCAP_LRO;
1085 			for_each_rxq(pi, i, rxq) {
1086 				if (ifp->if_capenable & IFCAP_LRO)
1087 					rxq->iq.flags |= IQ_LRO_ENABLED;
1088 				else
1089 					rxq->iq.flags &= ~IQ_LRO_ENABLED;
1090 			}
1091 #endif
1092 		}
1093 #ifdef TCP_OFFLOAD
1094 		if (mask & IFCAP_TOE) {
1095 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1096 
1097 			rc = toe_capability(pi, enable);
1098 			if (rc != 0)
1099 				goto fail;
1100 
1101 			ifp->if_capenable ^= mask;
1102 		}
1103 #endif
1104 		if (mask & IFCAP_VLAN_HWTAGGING) {
1105 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1106 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1107 				rc = update_mac_settings(pi, XGMAC_VLANEX);
1108 		}
1109 		if (mask & IFCAP_VLAN_MTU) {
1110 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
1111 
1112 			/* Need to find out how to disable auto-mtu-inflation */
1113 		}
1114 		if (mask & IFCAP_VLAN_HWTSO)
1115 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1116 		if (mask & IFCAP_VLAN_HWCSUM)
1117 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1118 
1119 #ifdef VLAN_CAPABILITIES
1120 		VLAN_CAPABILITIES(ifp);
1121 #endif
1122 fail:
1123 		end_synchronized_op(sc, 0);
1124 		break;
1125 
1126 	case SIOCSIFMEDIA:
1127 	case SIOCGIFMEDIA:
1128 		ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
1129 		break;
1130 
1131 	default:
1132 		rc = ether_ioctl(ifp, cmd, data);
1133 	}
1134 
1135 	return (rc);
1136 }
1137 
1138 static int
1139 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1140 {
1141 	struct port_info *pi = ifp->if_softc;
1142 	struct adapter *sc = pi->adapter;
1143 	struct sge_txq *txq = &sc->sge.txq[pi->first_txq];
1144 	struct buf_ring *br;
1145 	int rc;
1146 
1147 	M_ASSERTPKTHDR(m);
1148 
1149 	if (__predict_false(pi->link_cfg.link_ok == 0)) {
1150 		m_freem(m);
1151 		return (ENETDOWN);
1152 	}
1153 
1154 	if (m->m_flags & M_FLOWID)
1155 		txq += (m->m_pkthdr.flowid % pi->ntxq);
1156 	br = txq->br;
1157 
1158 	if (TXQ_TRYLOCK(txq) == 0) {
1159 		struct sge_eq *eq = &txq->eq;
1160 
1161 		/*
1162 		 * It is possible that t4_eth_tx finishes up and releases the
1163 		 * lock between the TRYLOCK above and the drbr_enqueue here.  We
1164 		 * need to make sure that this mbuf doesn't just sit there in
1165 		 * the drbr.
1166 		 */
1167 
1168 		rc = drbr_enqueue(ifp, br, m);
1169 		if (rc == 0 && callout_pending(&eq->tx_callout) == 0 &&
1170 		    !(eq->flags & EQ_DOOMED))
1171 			callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
1172 		return (rc);
1173 	}
1174 
1175 	/*
1176 	 * txq->m is the mbuf that is held up due to a temporary shortage of
1177 	 * resources and it should be put on the wire first.  Then what's in
1178 	 * drbr and finally the mbuf that was just passed in to us.
1179 	 *
1180 	 * Return code should indicate the fate of the mbuf that was passed in
1181 	 * this time.
1182 	 */
1183 
1184 	TXQ_LOCK_ASSERT_OWNED(txq);
1185 	if (drbr_needs_enqueue(ifp, br) || txq->m) {
1186 
1187 		/* Queued for transmission. */
1188 
1189 		rc = drbr_enqueue(ifp, br, m);
1190 		m = txq->m ? txq->m : drbr_dequeue(ifp, br);
1191 		(void) t4_eth_tx(ifp, txq, m);
1192 		TXQ_UNLOCK(txq);
1193 		return (rc);
1194 	}
1195 
1196 	/* Direct transmission. */
1197 	rc = t4_eth_tx(ifp, txq, m);
1198 	if (rc != 0 && txq->m)
1199 		rc = 0;	/* held, will be transmitted soon (hopefully) */
1200 
1201 	TXQ_UNLOCK(txq);
1202 	return (rc);
1203 }
1204 
1205 static void
1206 cxgbe_qflush(struct ifnet *ifp)
1207 {
1208 	struct port_info *pi = ifp->if_softc;
1209 	struct sge_txq *txq;
1210 	int i;
1211 	struct mbuf *m;
1212 
1213 	/* queues do not exist if !PORT_INIT_DONE. */
1214 	if (pi->flags & PORT_INIT_DONE) {
1215 		for_each_txq(pi, i, txq) {
1216 			TXQ_LOCK(txq);
1217 			m_freem(txq->m);
1218 			txq->m = NULL;
1219 			while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
1220 				m_freem(m);
1221 			TXQ_UNLOCK(txq);
1222 		}
1223 	}
1224 	if_qflush(ifp);
1225 }
1226 
1227 static int
1228 cxgbe_media_change(struct ifnet *ifp)
1229 {
1230 	struct port_info *pi = ifp->if_softc;
1231 
1232 	device_printf(pi->dev, "%s unimplemented.\n", __func__);
1233 
1234 	return (EOPNOTSUPP);
1235 }
1236 
1237 static void
1238 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1239 {
1240 	struct port_info *pi = ifp->if_softc;
1241 	struct ifmedia_entry *cur = pi->media.ifm_cur;
1242 	int speed = pi->link_cfg.speed;
1243 	int data = (pi->port_type << 8) | pi->mod_type;
1244 
1245 	if (cur->ifm_data != data) {
1246 		build_medialist(pi);
1247 		cur = pi->media.ifm_cur;
1248 	}
1249 
1250 	ifmr->ifm_status = IFM_AVALID;
1251 	if (!pi->link_cfg.link_ok)
1252 		return;
1253 
1254 	ifmr->ifm_status |= IFM_ACTIVE;
1255 
1256 	/* active and current will differ iff current media is autoselect. */
1257 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
1258 		return;
1259 
1260 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
1261 	if (speed == SPEED_10000)
1262 		ifmr->ifm_active |= IFM_10G_T;
1263 	else if (speed == SPEED_1000)
1264 		ifmr->ifm_active |= IFM_1000_T;
1265 	else if (speed == SPEED_100)
1266 		ifmr->ifm_active |= IFM_100_TX;
1267 	else if (speed == SPEED_10)
1268 		ifmr->ifm_active |= IFM_10_T;
1269 	else
1270 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
1271 			    speed));
1272 }
1273 
1274 void
1275 t4_fatal_err(struct adapter *sc)
1276 {
1277 	t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
1278 	t4_intr_disable(sc);
1279 	log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
1280 	    device_get_nameunit(sc->dev));
1281 }
1282 
1283 static int
1284 map_bars(struct adapter *sc)
1285 {
1286 	sc->regs_rid = PCIR_BAR(0);
1287 	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1288 	    &sc->regs_rid, RF_ACTIVE);
1289 	if (sc->regs_res == NULL) {
1290 		device_printf(sc->dev, "cannot map registers.\n");
1291 		return (ENXIO);
1292 	}
1293 	sc->bt = rman_get_bustag(sc->regs_res);
1294 	sc->bh = rman_get_bushandle(sc->regs_res);
1295 	sc->mmio_len = rman_get_size(sc->regs_res);
1296 
1297 	sc->msix_rid = PCIR_BAR(4);
1298 	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1299 	    &sc->msix_rid, RF_ACTIVE);
1300 	if (sc->msix_res == NULL) {
1301 		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
1302 		return (ENXIO);
1303 	}
1304 
1305 	return (0);
1306 }
1307 
1308 static void
1309 setup_memwin(struct adapter *sc)
1310 {
1311 	uint32_t bar0;
1312 
1313 	/*
1314 	 * Read low 32b of bar0 indirectly via the hardware backdoor mechanism.
1315 	 * Works from within PCI passthrough environments too, where
1316 	 * rman_get_start() can return a different value.  We need to program
1317 	 * the memory window decoders with the actual addresses that will be
1318 	 * coming across the PCIe link.
1319 	 */
1320 	bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
1321 	bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
1322 
1323 	t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 0),
1324 	    	     (bar0 + MEMWIN0_BASE) | V_BIR(0) |
1325 		     V_WINDOW(ilog2(MEMWIN0_APERTURE) - 10));
1326 
1327 	t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 1),
1328 		     (bar0 + MEMWIN1_BASE) | V_BIR(0) |
1329 		     V_WINDOW(ilog2(MEMWIN1_APERTURE) - 10));
1330 
1331 	t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2),
1332 		     (bar0 + MEMWIN2_BASE) | V_BIR(0) |
1333 		     V_WINDOW(ilog2(MEMWIN2_APERTURE) - 10));
1334 
1335 	/* flush */
1336 	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
1337 }
1338 
1339 static int
1340 cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g,
1341     struct intrs_and_queues *iaq)
1342 {
1343 	int rc, itype, navail, nrxq10g, nrxq1g, n;
1344 	int nofldrxq10g = 0, nofldrxq1g = 0;
1345 
1346 	bzero(iaq, sizeof(*iaq));
1347 
1348 	iaq->ntxq10g = t4_ntxq10g;
1349 	iaq->ntxq1g = t4_ntxq1g;
1350 	iaq->nrxq10g = nrxq10g = t4_nrxq10g;
1351 	iaq->nrxq1g = nrxq1g = t4_nrxq1g;
1352 #ifdef TCP_OFFLOAD
1353 	if (is_offload(sc)) {
1354 		iaq->nofldtxq10g = t4_nofldtxq10g;
1355 		iaq->nofldtxq1g = t4_nofldtxq1g;
1356 		iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
1357 		iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
1358 	}
1359 #endif
1360 
1361 	for (itype = INTR_MSIX; itype; itype >>= 1) {
1362 
1363 		if ((itype & t4_intr_types) == 0)
1364 			continue;	/* not allowed */
1365 
1366 		if (itype == INTR_MSIX)
1367 			navail = pci_msix_count(sc->dev);
1368 		else if (itype == INTR_MSI)
1369 			navail = pci_msi_count(sc->dev);
1370 		else
1371 			navail = 1;
1372 restart:
1373 		if (navail == 0)
1374 			continue;
1375 
1376 		iaq->intr_type = itype;
1377 		iaq->intr_flags = 0;
1378 
1379 		/*
1380 		 * Best option: an interrupt vector for errors, one for the
1381 		 * firmware event queue, and one each for each rxq (NIC as well
1382 		 * as offload).
1383 		 */
1384 		iaq->nirq = T4_EXTRA_INTR;
1385 		iaq->nirq += n10g * (nrxq10g + nofldrxq10g);
1386 		iaq->nirq += n1g * (nrxq1g + nofldrxq1g);
1387 		if (iaq->nirq <= navail &&
1388 		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
1389 			iaq->intr_flags |= INTR_DIRECT;
1390 			goto allocate;
1391 		}
1392 
1393 		/*
1394 		 * Second best option: an interrupt vector for errors, one for
1395 		 * the firmware event queue, and one each for either NIC or
1396 		 * offload rxq's.
1397 		 */
1398 		iaq->nirq = T4_EXTRA_INTR;
1399 		iaq->nirq += n10g * max(nrxq10g, nofldrxq10g);
1400 		iaq->nirq += n1g * max(nrxq1g, nofldrxq1g);
1401 		if (iaq->nirq <= navail &&
1402 		    (itype != INTR_MSI || powerof2(iaq->nirq)))
1403 			goto allocate;
1404 
1405 		/*
1406 		 * Next best option: an interrupt vector for errors, one for the
1407 		 * firmware event queue, and at least one per port.  At this
1408 		 * point we know we'll have to downsize nrxq or nofldrxq to fit
1409 		 * what's available to us.
1410 		 */
1411 		iaq->nirq = T4_EXTRA_INTR;
1412 		iaq->nirq += n10g + n1g;
1413 		if (iaq->nirq <= navail) {
1414 			int leftover = navail - iaq->nirq;
1415 
1416 			if (n10g > 0) {
1417 				int target = max(nrxq10g, nofldrxq10g);
1418 
1419 				n = 1;
1420 				while (n < target && leftover >= n10g) {
1421 					leftover -= n10g;
1422 					iaq->nirq += n10g;
1423 					n++;
1424 				}
1425 				iaq->nrxq10g = min(n, nrxq10g);
1426 #ifdef TCP_OFFLOAD
1427 				if (is_offload(sc))
1428 					iaq->nofldrxq10g = min(n, nofldrxq10g);
1429 #endif
1430 			}
1431 
1432 			if (n1g > 0) {
1433 				int target = max(nrxq1g, nofldrxq1g);
1434 
1435 				n = 1;
1436 				while (n < target && leftover >= n1g) {
1437 					leftover -= n1g;
1438 					iaq->nirq += n1g;
1439 					n++;
1440 				}
1441 				iaq->nrxq1g = min(n, nrxq1g);
1442 #ifdef TCP_OFFLOAD
1443 				if (is_offload(sc))
1444 					iaq->nofldrxq1g = min(n, nofldrxq1g);
1445 #endif
1446 			}
1447 
1448 			if (itype != INTR_MSI || powerof2(iaq->nirq))
1449 				goto allocate;
1450 		}
1451 
1452 		/*
1453 		 * Least desirable option: one interrupt vector for everything.
1454 		 */
1455 		iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
1456 #ifdef TCP_OFFLOAD
1457 		if (is_offload(sc))
1458 			iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
1459 #endif
1460 
1461 allocate:
1462 		navail = iaq->nirq;
1463 		rc = 0;
1464 		if (itype == INTR_MSIX)
1465 			rc = pci_alloc_msix(sc->dev, &navail);
1466 		else if (itype == INTR_MSI)
1467 			rc = pci_alloc_msi(sc->dev, &navail);
1468 
1469 		if (rc == 0) {
1470 			if (navail == iaq->nirq)
1471 				return (0);
1472 
1473 			/*
1474 			 * Didn't get the number requested.  Use whatever number
1475 			 * the kernel is willing to allocate (it's in navail).
1476 			 */
1477 			device_printf(sc->dev, "fewer vectors than requested, "
1478 			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
1479 			    itype, iaq->nirq, navail);
1480 			pci_release_msi(sc->dev);
1481 			goto restart;
1482 		}
1483 
1484 		device_printf(sc->dev,
1485 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
1486 		    itype, rc, iaq->nirq, navail);
1487 	}
1488 
1489 	device_printf(sc->dev,
1490 	    "failed to find a usable interrupt type.  "
1491 	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
1492 	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
1493 
1494 	return (ENXIO);
1495 }
1496 
1497 /*
1498  * Install a compatible firmware (if required), establish contact with it (by
1499  * saying hello), and reset the device.  If we end up as the master driver,
1500  * partition adapter resources by providing a configuration file to the
1501  * firmware.
1502  */
1503 static int
1504 prep_firmware(struct adapter *sc)
1505 {
1506 	const struct firmware *fw = NULL, *cfg = NULL, *default_cfg;
1507 	int rc;
1508 	enum dev_state state;
1509 
1510 	default_cfg = firmware_get(T4_CFGNAME);
1511 
1512 	/* Check firmware version and install a different one if necessary */
1513 	rc = t4_check_fw_version(sc);
1514 	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
1515 	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
1516 	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
1517 	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
1518 	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
1519 	if (rc != 0) {
1520 		uint32_t v = 0;
1521 
1522 		fw = firmware_get(T4_FWNAME);
1523 		if (fw != NULL) {
1524 			const struct fw_hdr *hdr = (const void *)fw->data;
1525 
1526 			v = ntohl(hdr->fw_ver);
1527 
1528 			/*
1529 			 * The firmware module will not be used if it isn't the
1530 			 * same major version as what the driver was compiled
1531 			 * with.
1532 			 */
1533 			if (G_FW_HDR_FW_VER_MAJOR(v) != FW_VERSION_MAJOR) {
1534 				device_printf(sc->dev,
1535 				    "Found firmware image but version %d "
1536 				    "can not be used with this driver (%d)\n",
1537 				    G_FW_HDR_FW_VER_MAJOR(v), FW_VERSION_MAJOR);
1538 
1539 				firmware_put(fw, FIRMWARE_UNLOAD);
1540 				fw = NULL;
1541 			}
1542 		}
1543 
1544 		if (fw == NULL && rc < 0) {
1545 			device_printf(sc->dev, "No usable firmware. "
1546 			    "card has %d.%d.%d, driver compiled with %d.%d.%d",
1547 			    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
1548 			    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
1549 			    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
1550 			    FW_VERSION_MAJOR, FW_VERSION_MINOR,
1551 			    FW_VERSION_MICRO);
1552 			rc = EAGAIN;
1553 			goto done;
1554 		}
1555 
1556 		/*
1557 		 * Always upgrade, even for minor/micro/build mismatches.
1558 		 * Downgrade only for a major version mismatch or if
1559 		 * force_firmware_install was specified.
1560 		 */
1561 		if (fw != NULL && (rc < 0 || v > sc->params.fw_vers)) {
1562 			device_printf(sc->dev,
1563 			    "installing firmware %d.%d.%d.%d on card.\n",
1564 			    G_FW_HDR_FW_VER_MAJOR(v), G_FW_HDR_FW_VER_MINOR(v),
1565 			    G_FW_HDR_FW_VER_MICRO(v), G_FW_HDR_FW_VER_BUILD(v));
1566 
1567 			rc = -t4_load_fw(sc, fw->data, fw->datasize);
1568 			if (rc != 0) {
1569 				device_printf(sc->dev,
1570 				    "failed to install firmware: %d\n", rc);
1571 				goto done;
1572 			} else {
1573 				/* refresh */
1574 				(void) t4_check_fw_version(sc);
1575 				snprintf(sc->fw_version,
1576 				    sizeof(sc->fw_version), "%u.%u.%u.%u",
1577 				    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
1578 				    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
1579 				    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
1580 				    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
1581 			}
1582 		}
1583 	}
1584 
1585 	/* Contact firmware.  */
1586 	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
1587 	if (rc < 0) {
1588 		rc = -rc;
1589 		device_printf(sc->dev,
1590 		    "failed to connect to the firmware: %d.\n", rc);
1591 		goto done;
1592 	}
1593 	if (rc == sc->mbox)
1594 		sc->flags |= MASTER_PF;
1595 
1596 	/* Reset device */
1597 	rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
1598 	if (rc != 0) {
1599 		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
1600 		if (rc != ETIMEDOUT && rc != EIO)
1601 			t4_fw_bye(sc, sc->mbox);
1602 		goto done;
1603 	}
1604 
1605 	/* Partition adapter resources as specified in the config file. */
1606 	if (sc->flags & MASTER_PF) {
1607 		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s",
1608 		    pci_get_device(sc->dev) == 0x440a ? "uwire" : t4_cfg_file);
1609 		if (strncmp(sc->cfg_file, "default", sizeof(sc->cfg_file))) {
1610 			char s[32];
1611 
1612 			snprintf(s, sizeof(s), "t4fw_cfg_%s", sc->cfg_file);
1613 			cfg = firmware_get(s);
1614 			if (cfg == NULL) {
1615 				device_printf(sc->dev,
1616 				    "unable to locate %s module, "
1617 				    "will use default config file.\n", s);
1618 				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
1619 				    "%s", "default");
1620 			}
1621 		}
1622 
1623 		rc = partition_resources(sc, cfg ? cfg : default_cfg);
1624 		if (rc != 0)
1625 			goto done;	/* error message displayed already */
1626 	} else {
1627 		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", "notme");
1628 		sc->cfcsum = (u_int)-1;
1629 	}
1630 
1631 	sc->flags |= FW_OK;
1632 
1633 done:
1634 	if (fw != NULL)
1635 		firmware_put(fw, FIRMWARE_UNLOAD);
1636 	if (cfg != NULL)
1637 		firmware_put(cfg, FIRMWARE_UNLOAD);
1638 	if (default_cfg != NULL)
1639 		firmware_put(default_cfg, FIRMWARE_UNLOAD);
1640 
1641 	return (rc);
1642 }
1643 
1644 #define FW_PARAM_DEV(param) \
1645 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
1646 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
1647 #define FW_PARAM_PFVF(param) \
1648 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
1649 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
1650 
1651 /*
1652  * Upload configuration file to card's memory.
1653  */
1654 static int
1655 upload_config_file(struct adapter *sc, const struct firmware *fw, uint32_t *mt,
1656     uint32_t *ma)
1657 {
1658 	int rc, i;
1659 	uint32_t param, val, mtype, maddr, bar, off, win, remaining;
1660 	const uint32_t *b;
1661 
1662 	/* Figure out where the firmware wants us to upload it. */
1663 	param = FW_PARAM_DEV(CF);
1664 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
1665 	if (rc != 0) {
1666 		/* Firmwares without config file support will fail this way */
1667 		device_printf(sc->dev,
1668 		    "failed to query config file location: %d.\n", rc);
1669 		return (rc);
1670 	}
1671 	*mt = mtype = G_FW_PARAMS_PARAM_Y(val);
1672 	*ma = maddr = G_FW_PARAMS_PARAM_Z(val) << 16;
1673 
1674 	if (maddr & 3) {
1675 		device_printf(sc->dev,
1676 		    "cannot upload config file (type %u, addr %x).\n",
1677 		    mtype, maddr);
1678 		return (EFAULT);
1679 	}
1680 
1681 	/* Translate mtype/maddr to an address suitable for the PCIe window */
1682 	val = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
1683 	val &= F_EDRAM0_ENABLE | F_EDRAM1_ENABLE | F_EXT_MEM_ENABLE;
1684 	switch (mtype) {
1685 	case FW_MEMTYPE_CF_EDC0:
1686 		if (!(val & F_EDRAM0_ENABLE))
1687 			goto err;
1688 		bar = t4_read_reg(sc, A_MA_EDRAM0_BAR);
1689 		maddr += G_EDRAM0_BASE(bar) << 20;
1690 		break;
1691 
1692 	case FW_MEMTYPE_CF_EDC1:
1693 		if (!(val & F_EDRAM1_ENABLE))
1694 			goto err;
1695 		bar = t4_read_reg(sc, A_MA_EDRAM1_BAR);
1696 		maddr += G_EDRAM1_BASE(bar) << 20;
1697 		break;
1698 
1699 	case FW_MEMTYPE_CF_EXTMEM:
1700 		if (!(val & F_EXT_MEM_ENABLE))
1701 			goto err;
1702 		bar = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
1703 		maddr += G_EXT_MEM_BASE(bar) << 20;
1704 		break;
1705 
1706 	default:
1707 err:
1708 		device_printf(sc->dev,
1709 		    "cannot upload config file (type %u, enabled %u).\n",
1710 		    mtype, val);
1711 		return (EFAULT);
1712 	}
1713 
1714 	/*
1715 	 * Position the PCIe window (we use memwin2) to the 16B aligned area
1716 	 * just at/before the upload location.
1717 	 */
1718 	win = maddr & ~0xf;
1719 	off = maddr - win;  /* offset from the start of the window. */
1720 	t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), win);
1721 	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2));
1722 
1723 	remaining = fw->datasize;
1724 	if (remaining > FLASH_CFG_MAX_SIZE ||
1725 	    remaining > MEMWIN2_APERTURE - off) {
1726 		device_printf(sc->dev, "cannot upload config file all at once "
1727 		    "(size %u, max %u, room %u).\n",
1728 		    remaining, FLASH_CFG_MAX_SIZE, MEMWIN2_APERTURE - off);
1729 		return (EFBIG);
1730 	}
1731 
1732 	/*
1733 	 * XXX: sheer laziness.  We deliberately added 4 bytes of useless
1734 	 * stuffing/comments at the end of the config file so it's ok to simply
1735 	 * throw away the last remaining bytes when the config file is not an
1736 	 * exact multiple of 4.
1737 	 */
1738 	b = fw->data;
1739 	for (i = 0; remaining >= 4; i += 4, remaining -= 4)
1740 		t4_write_reg(sc, MEMWIN2_BASE + off + i, *b++);
1741 
1742 	return (rc);
1743 }
1744 
1745 /*
1746  * Partition chip resources for use between various PFs, VFs, etc.  This is done
1747  * by uploading the firmware configuration file to the adapter and instructing
1748  * the firmware to process it.
1749  */
1750 static int
1751 partition_resources(struct adapter *sc, const struct firmware *cfg)
1752 {
1753 	int rc;
1754 	struct fw_caps_config_cmd caps;
1755 	uint32_t mtype, maddr, finicsum, cfcsum;
1756 
1757 	rc = cfg ? upload_config_file(sc, cfg, &mtype, &maddr) : ENOENT;
1758 	if (rc != 0) {
1759 		mtype = FW_MEMTYPE_CF_FLASH;
1760 		maddr = t4_flash_cfg_addr(sc);
1761 	}
1762 
1763 	bzero(&caps, sizeof(caps));
1764 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
1765 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
1766 	caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
1767 	    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
1768 	    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) | FW_LEN16(caps));
1769 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
1770 	if (rc != 0) {
1771 		device_printf(sc->dev,
1772 		    "failed to pre-process config file: %d.\n", rc);
1773 		return (rc);
1774 	}
1775 
1776 	finicsum = be32toh(caps.finicsum);
1777 	cfcsum = be32toh(caps.cfcsum);
1778 	if (finicsum != cfcsum) {
1779 		device_printf(sc->dev,
1780 		    "WARNING: config file checksum mismatch: %08x %08x\n",
1781 		    finicsum, cfcsum);
1782 	}
1783 	sc->cfcsum = cfcsum;
1784 
1785 #define LIMIT_CAPS(x) do { \
1786 	caps.x &= htobe16(t4_##x##_allowed); \
1787 	sc->x = htobe16(caps.x); \
1788 } while (0)
1789 
1790 	/*
1791 	 * Let the firmware know what features will (not) be used so it can tune
1792 	 * things accordingly.
1793 	 */
1794 	LIMIT_CAPS(linkcaps);
1795 	LIMIT_CAPS(niccaps);
1796 	LIMIT_CAPS(toecaps);
1797 	LIMIT_CAPS(rdmacaps);
1798 	LIMIT_CAPS(iscsicaps);
1799 	LIMIT_CAPS(fcoecaps);
1800 #undef LIMIT_CAPS
1801 
1802 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
1803 	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
1804 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
1805 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
1806 	if (rc != 0) {
1807 		device_printf(sc->dev,
1808 		    "failed to process config file: %d.\n", rc);
1809 		return (rc);
1810 	}
1811 
1812 	return (0);
1813 }
1814 
1815 /*
1816  * Retrieve parameters that are needed (or nice to have) prior to calling
1817  * t4_sge_init and t4_fw_initialize.
1818  */
1819 static int
1820 get_params__pre_init(struct adapter *sc)
1821 {
1822 	int rc;
1823 	uint32_t param[2], val[2];
1824 	struct fw_devlog_cmd cmd;
1825 	struct devlog_params *dlog = &sc->params.devlog;
1826 
1827 	param[0] = FW_PARAM_DEV(PORTVEC);
1828 	param[1] = FW_PARAM_DEV(CCLK);
1829 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
1830 	if (rc != 0) {
1831 		device_printf(sc->dev,
1832 		    "failed to query parameters (pre_init): %d.\n", rc);
1833 		return (rc);
1834 	}
1835 
1836 	sc->params.portvec = val[0];
1837 	sc->params.nports = bitcount32(val[0]);
1838 	sc->params.vpd.cclk = val[1];
1839 
1840 	/* Read device log parameters. */
1841 	bzero(&cmd, sizeof(cmd));
1842 	cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) |
1843 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
1844 	cmd.retval_len16 = htobe32(FW_LEN16(cmd));
1845 	rc = -t4_wr_mbox(sc, sc->mbox, &cmd, sizeof(cmd), &cmd);
1846 	if (rc != 0) {
1847 		device_printf(sc->dev,
1848 		    "failed to get devlog parameters: %d.\n", rc);
1849 		bzero(dlog, sizeof (*dlog));
1850 		rc = 0;	/* devlog isn't critical for device operation */
1851 	} else {
1852 		val[0] = be32toh(cmd.memtype_devlog_memaddr16_devlog);
1853 		dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(val[0]);
1854 		dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(val[0]) << 4;
1855 		dlog->size = be32toh(cmd.memsize_devlog);
1856 	}
1857 
1858 	return (rc);
1859 }
1860 
1861 /*
1862  * Retrieve various parameters that are of interest to the driver.  The device
1863  * has been initialized by the firmware at this point.
1864  */
1865 static int
1866 get_params__post_init(struct adapter *sc)
1867 {
1868 	int rc;
1869 	uint32_t param[7], val[7];
1870 	struct fw_caps_config_cmd caps;
1871 
1872 	param[0] = FW_PARAM_PFVF(IQFLINT_START);
1873 	param[1] = FW_PARAM_PFVF(EQ_START);
1874 	param[2] = FW_PARAM_PFVF(FILTER_START);
1875 	param[3] = FW_PARAM_PFVF(FILTER_END);
1876 	param[4] = FW_PARAM_PFVF(L2T_START);
1877 	param[5] = FW_PARAM_PFVF(L2T_END);
1878 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
1879 	if (rc != 0) {
1880 		device_printf(sc->dev,
1881 		    "failed to query parameters (post_init): %d.\n", rc);
1882 		return (rc);
1883 	}
1884 
1885 	sc->sge.iq_start = val[0];
1886 	sc->sge.eq_start = val[1];
1887 	sc->tids.ftid_base = val[2];
1888 	sc->tids.nftids = val[3] - val[2] + 1;
1889 	sc->vres.l2t.start = val[4];
1890 	sc->vres.l2t.size = val[5] - val[4] + 1;
1891 	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
1892 	    ("%s: L2 table size (%u) larger than expected (%u)",
1893 	    __func__, sc->vres.l2t.size, L2T_SIZE));
1894 
1895 	/* get capabilites */
1896 	bzero(&caps, sizeof(caps));
1897 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
1898 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
1899 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
1900 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
1901 	if (rc != 0) {
1902 		device_printf(sc->dev,
1903 		    "failed to get card capabilities: %d.\n", rc);
1904 		return (rc);
1905 	}
1906 
1907 	if (caps.toecaps) {
1908 		/* query offload-related parameters */
1909 		param[0] = FW_PARAM_DEV(NTID);
1910 		param[1] = FW_PARAM_PFVF(SERVER_START);
1911 		param[2] = FW_PARAM_PFVF(SERVER_END);
1912 		param[3] = FW_PARAM_PFVF(TDDP_START);
1913 		param[4] = FW_PARAM_PFVF(TDDP_END);
1914 		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
1915 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
1916 		if (rc != 0) {
1917 			device_printf(sc->dev,
1918 			    "failed to query TOE parameters: %d.\n", rc);
1919 			return (rc);
1920 		}
1921 		sc->tids.ntids = val[0];
1922 		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
1923 		sc->tids.stid_base = val[1];
1924 		sc->tids.nstids = val[2] - val[1] + 1;
1925 		sc->vres.ddp.start = val[3];
1926 		sc->vres.ddp.size = val[4] - val[3] + 1;
1927 		sc->params.ofldq_wr_cred = val[5];
1928 		sc->params.offload = 1;
1929 	}
1930 	if (caps.rdmacaps) {
1931 		param[0] = FW_PARAM_PFVF(STAG_START);
1932 		param[1] = FW_PARAM_PFVF(STAG_END);
1933 		param[2] = FW_PARAM_PFVF(RQ_START);
1934 		param[3] = FW_PARAM_PFVF(RQ_END);
1935 		param[4] = FW_PARAM_PFVF(PBL_START);
1936 		param[5] = FW_PARAM_PFVF(PBL_END);
1937 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
1938 		if (rc != 0) {
1939 			device_printf(sc->dev,
1940 			    "failed to query RDMA parameters(1): %d.\n", rc);
1941 			return (rc);
1942 		}
1943 		sc->vres.stag.start = val[0];
1944 		sc->vres.stag.size = val[1] - val[0] + 1;
1945 		sc->vres.rq.start = val[2];
1946 		sc->vres.rq.size = val[3] - val[2] + 1;
1947 		sc->vres.pbl.start = val[4];
1948 		sc->vres.pbl.size = val[5] - val[4] + 1;
1949 
1950 		param[0] = FW_PARAM_PFVF(SQRQ_START);
1951 		param[1] = FW_PARAM_PFVF(SQRQ_END);
1952 		param[2] = FW_PARAM_PFVF(CQ_START);
1953 		param[3] = FW_PARAM_PFVF(CQ_END);
1954 		param[4] = FW_PARAM_PFVF(OCQ_START);
1955 		param[5] = FW_PARAM_PFVF(OCQ_END);
1956 		rc = -t4_query_params(sc, 0, 0, 0, 6, param, val);
1957 		if (rc != 0) {
1958 			device_printf(sc->dev,
1959 			    "failed to query RDMA parameters(2): %d.\n", rc);
1960 			return (rc);
1961 		}
1962 		sc->vres.qp.start = val[0];
1963 		sc->vres.qp.size = val[1] - val[0] + 1;
1964 		sc->vres.cq.start = val[2];
1965 		sc->vres.cq.size = val[3] - val[2] + 1;
1966 		sc->vres.ocq.start = val[4];
1967 		sc->vres.ocq.size = val[5] - val[4] + 1;
1968 	}
1969 	if (caps.iscsicaps) {
1970 		param[0] = FW_PARAM_PFVF(ISCSI_START);
1971 		param[1] = FW_PARAM_PFVF(ISCSI_END);
1972 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
1973 		if (rc != 0) {
1974 			device_printf(sc->dev,
1975 			    "failed to query iSCSI parameters: %d.\n", rc);
1976 			return (rc);
1977 		}
1978 		sc->vres.iscsi.start = val[0];
1979 		sc->vres.iscsi.size = val[1] - val[0] + 1;
1980 	}
1981 
1982 	/* These are finalized by FW initialization, load their values now */
1983 	val[0] = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
1984 	sc->params.tp.tre = G_TIMERRESOLUTION(val[0]);
1985 	sc->params.tp.dack_re = G_DELAYEDACKRESOLUTION(val[0]);
1986 	t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
1987 
1988 	return (rc);
1989 }
1990 
1991 #undef FW_PARAM_PFVF
1992 #undef FW_PARAM_DEV
1993 
1994 static void
1995 t4_set_desc(struct adapter *sc)
1996 {
1997 	char buf[128];
1998 	struct adapter_params *p = &sc->params;
1999 
2000 	snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, E/C:%s",
2001 	    p->vpd.id, is_offload(sc) ? "R" : "", p->rev, p->vpd.sn, p->vpd.ec);
2002 
2003 	device_set_desc_copy(sc->dev, buf);
2004 }
2005 
2006 static void
2007 build_medialist(struct port_info *pi)
2008 {
2009 	struct ifmedia *media = &pi->media;
2010 	int data, m;
2011 
2012 	PORT_LOCK(pi);
2013 
2014 	ifmedia_removeall(media);
2015 
2016 	m = IFM_ETHER | IFM_FDX;
2017 	data = (pi->port_type << 8) | pi->mod_type;
2018 
2019 	switch(pi->port_type) {
2020 	case FW_PORT_TYPE_BT_XFI:
2021 		ifmedia_add(media, m | IFM_10G_T, data, NULL);
2022 		break;
2023 
2024 	case FW_PORT_TYPE_BT_XAUI:
2025 		ifmedia_add(media, m | IFM_10G_T, data, NULL);
2026 		/* fall through */
2027 
2028 	case FW_PORT_TYPE_BT_SGMII:
2029 		ifmedia_add(media, m | IFM_1000_T, data, NULL);
2030 		ifmedia_add(media, m | IFM_100_TX, data, NULL);
2031 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, data, NULL);
2032 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2033 		break;
2034 
2035 	case FW_PORT_TYPE_CX4:
2036 		ifmedia_add(media, m | IFM_10G_CX4, data, NULL);
2037 		ifmedia_set(media, m | IFM_10G_CX4);
2038 		break;
2039 
2040 	case FW_PORT_TYPE_SFP:
2041 	case FW_PORT_TYPE_FIBER_XFI:
2042 	case FW_PORT_TYPE_FIBER_XAUI:
2043 		switch (pi->mod_type) {
2044 
2045 		case FW_PORT_MOD_TYPE_LR:
2046 			ifmedia_add(media, m | IFM_10G_LR, data, NULL);
2047 			ifmedia_set(media, m | IFM_10G_LR);
2048 			break;
2049 
2050 		case FW_PORT_MOD_TYPE_SR:
2051 			ifmedia_add(media, m | IFM_10G_SR, data, NULL);
2052 			ifmedia_set(media, m | IFM_10G_SR);
2053 			break;
2054 
2055 		case FW_PORT_MOD_TYPE_LRM:
2056 			ifmedia_add(media, m | IFM_10G_LRM, data, NULL);
2057 			ifmedia_set(media, m | IFM_10G_LRM);
2058 			break;
2059 
2060 		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2061 		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2062 			ifmedia_add(media, m | IFM_10G_TWINAX, data, NULL);
2063 			ifmedia_set(media, m | IFM_10G_TWINAX);
2064 			break;
2065 
2066 		case FW_PORT_MOD_TYPE_NONE:
2067 			m &= ~IFM_FDX;
2068 			ifmedia_add(media, m | IFM_NONE, data, NULL);
2069 			ifmedia_set(media, m | IFM_NONE);
2070 			break;
2071 
2072 		case FW_PORT_MOD_TYPE_NA:
2073 		case FW_PORT_MOD_TYPE_ER:
2074 		default:
2075 			ifmedia_add(media, m | IFM_UNKNOWN, data, NULL);
2076 			ifmedia_set(media, m | IFM_UNKNOWN);
2077 			break;
2078 		}
2079 		break;
2080 
2081 	case FW_PORT_TYPE_KX4:
2082 	case FW_PORT_TYPE_KX:
2083 	case FW_PORT_TYPE_KR:
2084 	default:
2085 		ifmedia_add(media, m | IFM_UNKNOWN, data, NULL);
2086 		ifmedia_set(media, m | IFM_UNKNOWN);
2087 		break;
2088 	}
2089 
2090 	PORT_UNLOCK(pi);
2091 }
2092 
2093 #define FW_MAC_EXACT_CHUNK	7
2094 
2095 /*
2096  * Program the port's XGMAC based on parameters in ifnet.  The caller also
2097  * indicates which parameters should be programmed (the rest are left alone).
2098  */
2099 static int
2100 update_mac_settings(struct port_info *pi, int flags)
2101 {
2102 	int rc;
2103 	struct ifnet *ifp = pi->ifp;
2104 	struct adapter *sc = pi->adapter;
2105 	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
2106 
2107 	ASSERT_SYNCHRONIZED_OP(sc);
2108 	KASSERT(flags, ("%s: not told what to update.", __func__));
2109 
2110 	if (flags & XGMAC_MTU)
2111 		mtu = ifp->if_mtu;
2112 
2113 	if (flags & XGMAC_PROMISC)
2114 		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
2115 
2116 	if (flags & XGMAC_ALLMULTI)
2117 		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
2118 
2119 	if (flags & XGMAC_VLANEX)
2120 		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
2121 
2122 	rc = -t4_set_rxmode(sc, sc->mbox, pi->viid, mtu, promisc, allmulti, 1,
2123 	    vlanex, false);
2124 	if (rc) {
2125 		if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc);
2126 		return (rc);
2127 	}
2128 
2129 	if (flags & XGMAC_UCADDR) {
2130 		uint8_t ucaddr[ETHER_ADDR_LEN];
2131 
2132 		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
2133 		rc = t4_change_mac(sc, sc->mbox, pi->viid, pi->xact_addr_filt,
2134 		    ucaddr, true, true);
2135 		if (rc < 0) {
2136 			rc = -rc;
2137 			if_printf(ifp, "change_mac failed: %d\n", rc);
2138 			return (rc);
2139 		} else {
2140 			pi->xact_addr_filt = rc;
2141 			rc = 0;
2142 		}
2143 	}
2144 
2145 	if (flags & XGMAC_MCADDRS) {
2146 		const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
2147 		int del = 1;
2148 		uint64_t hash = 0;
2149 		struct ifmultiaddr *ifma;
2150 		int i = 0, j;
2151 
2152 		if_maddr_rlock(ifp);
2153 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2154 			if (ifma->ifma_addr->sa_family != AF_LINK)
2155 				continue;
2156 			mcaddr[i++] =
2157 			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
2158 
2159 			if (i == FW_MAC_EXACT_CHUNK) {
2160 				rc = t4_alloc_mac_filt(sc, sc->mbox, pi->viid,
2161 				    del, i, mcaddr, NULL, &hash, 0);
2162 				if (rc < 0) {
2163 					rc = -rc;
2164 					for (j = 0; j < i; j++) {
2165 						if_printf(ifp,
2166 						    "failed to add mc address"
2167 						    " %02x:%02x:%02x:"
2168 						    "%02x:%02x:%02x rc=%d\n",
2169 						    mcaddr[j][0], mcaddr[j][1],
2170 						    mcaddr[j][2], mcaddr[j][3],
2171 						    mcaddr[j][4], mcaddr[j][5],
2172 						    rc);
2173 					}
2174 					goto mcfail;
2175 				}
2176 				del = 0;
2177 				i = 0;
2178 			}
2179 		}
2180 		if (i > 0) {
2181 			rc = t4_alloc_mac_filt(sc, sc->mbox, pi->viid,
2182 			    del, i, mcaddr, NULL, &hash, 0);
2183 			if (rc < 0) {
2184 				rc = -rc;
2185 				for (j = 0; j < i; j++) {
2186 					if_printf(ifp,
2187 					    "failed to add mc address"
2188 					    " %02x:%02x:%02x:"
2189 					    "%02x:%02x:%02x rc=%d\n",
2190 					    mcaddr[j][0], mcaddr[j][1],
2191 					    mcaddr[j][2], mcaddr[j][3],
2192 					    mcaddr[j][4], mcaddr[j][5],
2193 					    rc);
2194 				}
2195 				goto mcfail;
2196 			}
2197 		}
2198 
2199 		rc = -t4_set_addr_hash(sc, sc->mbox, pi->viid, 0, hash, 0);
2200 		if (rc != 0)
2201 			if_printf(ifp, "failed to set mc address hash: %d", rc);
2202 mcfail:
2203 		if_maddr_runlock(ifp);
2204 	}
2205 
2206 	return (rc);
2207 }
2208 
2209 int
2210 begin_synchronized_op(struct adapter *sc, struct port_info *pi, int flags,
2211     char *wmesg)
2212 {
2213 	int rc, pri;
2214 
2215 #ifdef WITNESS
2216 	/* the caller thinks it's ok to sleep, but is it really? */
2217 	if (flags & SLEEP_OK)
2218 		pause("t4slptst", 1);
2219 #endif
2220 
2221 	if (INTR_OK)
2222 		pri = PCATCH;
2223 	else
2224 		pri = 0;
2225 
2226 	ADAPTER_LOCK(sc);
2227 	for (;;) {
2228 
2229 		if (pi && IS_DOOMED(pi)) {
2230 			rc = ENXIO;
2231 			goto done;
2232 		}
2233 
2234 		if (!IS_BUSY(sc)) {
2235 			rc = 0;
2236 			break;
2237 		}
2238 
2239 		if (!(flags & SLEEP_OK)) {
2240 			rc = EBUSY;
2241 			goto done;
2242 		}
2243 
2244 		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
2245 			rc = EINTR;
2246 			goto done;
2247 		}
2248 	}
2249 
2250 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
2251 	SET_BUSY(sc);
2252 #ifdef INVARIANTS
2253 	sc->last_op = wmesg;
2254 	sc->last_op_thr = curthread;
2255 #endif
2256 
2257 done:
2258 	if (!(flags & HOLD_LOCK) || rc)
2259 		ADAPTER_UNLOCK(sc);
2260 
2261 	return (rc);
2262 }
2263 
2264 void
2265 end_synchronized_op(struct adapter *sc, int flags)
2266 {
2267 
2268 	if (flags & LOCK_HELD)
2269 		ADAPTER_LOCK_ASSERT_OWNED(sc);
2270 	else
2271 		ADAPTER_LOCK(sc);
2272 
2273 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
2274 	CLR_BUSY(sc);
2275 	wakeup(&sc->flags);
2276 	ADAPTER_UNLOCK(sc);
2277 }
2278 
2279 static int
2280 cxgbe_init_synchronized(struct port_info *pi)
2281 {
2282 	struct adapter *sc = pi->adapter;
2283 	struct ifnet *ifp = pi->ifp;
2284 	int rc = 0;
2285 
2286 	ASSERT_SYNCHRONIZED_OP(sc);
2287 
2288 	if (isset(&sc->open_device_map, pi->port_id)) {
2289 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2290 		    ("mismatch between open_device_map and if_drv_flags"));
2291 		return (0);	/* already running */
2292 	}
2293 
2294 	if (!(sc->flags & FULL_INIT_DONE) &&
2295 	    ((rc = adapter_full_init(sc)) != 0))
2296 		return (rc);	/* error message displayed already */
2297 
2298 	if (!(pi->flags & PORT_INIT_DONE) &&
2299 	    ((rc = port_full_init(pi)) != 0))
2300 		return (rc); /* error message displayed already */
2301 
2302 	rc = update_mac_settings(pi, XGMAC_ALL);
2303 	if (rc)
2304 		goto done;	/* error message displayed already */
2305 
2306 	rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
2307 	if (rc != 0) {
2308 		if_printf(ifp, "start_link failed: %d\n", rc);
2309 		goto done;
2310 	}
2311 
2312 	rc = -t4_enable_vi(sc, sc->mbox, pi->viid, true, true);
2313 	if (rc != 0) {
2314 		if_printf(ifp, "enable_vi failed: %d\n", rc);
2315 		goto done;
2316 	}
2317 
2318 	/* all ok */
2319 	setbit(&sc->open_device_map, pi->port_id);
2320 	PORT_LOCK(pi);
2321 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2322 	PORT_UNLOCK(pi);
2323 
2324 	callout_reset(&pi->tick, hz, cxgbe_tick, pi);
2325 done:
2326 	if (rc != 0)
2327 		cxgbe_uninit_synchronized(pi);
2328 
2329 	return (rc);
2330 }
2331 
2332 /*
2333  * Idempotent.
2334  */
2335 static int
2336 cxgbe_uninit_synchronized(struct port_info *pi)
2337 {
2338 	struct adapter *sc = pi->adapter;
2339 	struct ifnet *ifp = pi->ifp;
2340 	int rc;
2341 
2342 	ASSERT_SYNCHRONIZED_OP(sc);
2343 
2344 	/*
2345 	 * Disable the VI so that all its data in either direction is discarded
2346 	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
2347 	 * tick) intact as the TP can deliver negative advice or data that it's
2348 	 * holding in its RAM (for an offloaded connection) even after the VI is
2349 	 * disabled.
2350 	 */
2351 	rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false);
2352 	if (rc) {
2353 		if_printf(ifp, "disable_vi failed: %d\n", rc);
2354 		return (rc);
2355 	}
2356 
2357 	clrbit(&sc->open_device_map, pi->port_id);
2358 	PORT_LOCK(pi);
2359 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2360 	PORT_UNLOCK(pi);
2361 
2362 	pi->link_cfg.link_ok = 0;
2363 	pi->link_cfg.speed = 0;
2364 	t4_os_link_changed(sc, pi->port_id, 0);
2365 
2366 	return (0);
2367 }
2368 
2369 /*
2370  * It is ok for this function to fail midway and return right away.  t4_detach
2371  * will walk the entire sc->irq list and clean up whatever is valid.
2372  */
2373 static int
2374 setup_intr_handlers(struct adapter *sc)
2375 {
2376 	int rc, rid, p, q;
2377 	char s[8];
2378 	struct irq *irq;
2379 	struct port_info *pi;
2380 	struct sge_rxq *rxq;
2381 #ifdef TCP_OFFLOAD
2382 	struct sge_ofld_rxq *ofld_rxq;
2383 #endif
2384 
2385 	/*
2386 	 * Setup interrupts.
2387 	 */
2388 	irq = &sc->irq[0];
2389 	rid = sc->intr_type == INTR_INTX ? 0 : 1;
2390 	if (sc->intr_count == 1) {
2391 		KASSERT(!(sc->flags & INTR_DIRECT),
2392 		    ("%s: single interrupt && INTR_DIRECT?", __func__));
2393 
2394 		rc = t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all");
2395 		if (rc != 0)
2396 			return (rc);
2397 	} else {
2398 		/* Multiple interrupts. */
2399 		KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
2400 		    ("%s: too few intr.", __func__));
2401 
2402 		/* The first one is always error intr */
2403 		rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
2404 		if (rc != 0)
2405 			return (rc);
2406 		irq++;
2407 		rid++;
2408 
2409 		/* The second one is always the firmware event queue */
2410 		rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sc->sge.fwq,
2411 		    "evt");
2412 		if (rc != 0)
2413 			return (rc);
2414 		irq++;
2415 		rid++;
2416 
2417 		/*
2418 		 * Note that if INTR_DIRECT is not set then either the NIC rx
2419 		 * queues or (exclusive or) the TOE rx queueus will be taking
2420 		 * direct interrupts.
2421 		 *
2422 		 * There is no need to check for is_offload(sc) as nofldrxq
2423 		 * will be 0 if offload is disabled.
2424 		 */
2425 		for_each_port(sc, p) {
2426 			pi = sc->port[p];
2427 
2428 #ifdef TCP_OFFLOAD
2429 			/*
2430 			 * Skip over the NIC queues if they aren't taking direct
2431 			 * interrupts.
2432 			 */
2433 			if (!(sc->flags & INTR_DIRECT) &&
2434 			    pi->nofldrxq > pi->nrxq)
2435 				goto ofld_queues;
2436 #endif
2437 			rxq = &sc->sge.rxq[pi->first_rxq];
2438 			for (q = 0; q < pi->nrxq; q++, rxq++) {
2439 				snprintf(s, sizeof(s), "%d.%d", p, q);
2440 				rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq,
2441 				    s);
2442 				if (rc != 0)
2443 					return (rc);
2444 				irq++;
2445 				rid++;
2446 			}
2447 
2448 #ifdef TCP_OFFLOAD
2449 			/*
2450 			 * Skip over the offload queues if they aren't taking
2451 			 * direct interrupts.
2452 			 */
2453 			if (!(sc->flags & INTR_DIRECT))
2454 				continue;
2455 ofld_queues:
2456 			ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq];
2457 			for (q = 0; q < pi->nofldrxq; q++, ofld_rxq++) {
2458 				snprintf(s, sizeof(s), "%d,%d", p, q);
2459 				rc = t4_alloc_irq(sc, irq, rid, t4_intr,
2460 				    ofld_rxq, s);
2461 				if (rc != 0)
2462 					return (rc);
2463 				irq++;
2464 				rid++;
2465 			}
2466 #endif
2467 		}
2468 	}
2469 
2470 	return (0);
2471 }
2472 
2473 static int
2474 adapter_full_init(struct adapter *sc)
2475 {
2476 	int rc, i;
2477 
2478 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2479 	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
2480 	    ("%s: FULL_INIT_DONE already", __func__));
2481 
2482 	/*
2483 	 * queues that belong to the adapter (not any particular port).
2484 	 */
2485 	rc = t4_setup_adapter_queues(sc);
2486 	if (rc != 0)
2487 		goto done;
2488 
2489 	for (i = 0; i < nitems(sc->tq); i++) {
2490 		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
2491 		    taskqueue_thread_enqueue, &sc->tq[i]);
2492 		if (sc->tq[i] == NULL) {
2493 			device_printf(sc->dev,
2494 			    "failed to allocate task queue %d\n", i);
2495 			rc = ENOMEM;
2496 			goto done;
2497 		}
2498 		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
2499 		    device_get_nameunit(sc->dev), i);
2500 	}
2501 
2502 	t4_intr_enable(sc);
2503 	sc->flags |= FULL_INIT_DONE;
2504 done:
2505 	if (rc != 0)
2506 		adapter_full_uninit(sc);
2507 
2508 	return (rc);
2509 }
2510 
2511 static int
2512 adapter_full_uninit(struct adapter *sc)
2513 {
2514 	int i;
2515 
2516 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2517 
2518 	t4_teardown_adapter_queues(sc);
2519 
2520 	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
2521 		taskqueue_free(sc->tq[i]);
2522 		sc->tq[i] = NULL;
2523 	}
2524 
2525 	sc->flags &= ~FULL_INIT_DONE;
2526 
2527 	return (0);
2528 }
2529 
2530 static int
2531 port_full_init(struct port_info *pi)
2532 {
2533 	struct adapter *sc = pi->adapter;
2534 	struct ifnet *ifp = pi->ifp;
2535 	uint16_t *rss;
2536 	struct sge_rxq *rxq;
2537 	int rc, i;
2538 
2539 	ASSERT_SYNCHRONIZED_OP(sc);
2540 	KASSERT((pi->flags & PORT_INIT_DONE) == 0,
2541 	    ("%s: PORT_INIT_DONE already", __func__));
2542 
2543 	sysctl_ctx_init(&pi->ctx);
2544 	pi->flags |= PORT_SYSCTL_CTX;
2545 
2546 	/*
2547 	 * Allocate tx/rx/fl queues for this port.
2548 	 */
2549 	rc = t4_setup_port_queues(pi);
2550 	if (rc != 0)
2551 		goto done;	/* error message displayed already */
2552 
2553 	/*
2554 	 * Setup RSS for this port.
2555 	 */
2556 	rss = malloc(pi->nrxq * sizeof (*rss), M_CXGBE,
2557 	    M_ZERO | M_WAITOK);
2558 	for_each_rxq(pi, i, rxq) {
2559 		rss[i] = rxq->iq.abs_id;
2560 	}
2561 	rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0,
2562 	    pi->rss_size, rss, pi->nrxq);
2563 	free(rss, M_CXGBE);
2564 	if (rc != 0) {
2565 		if_printf(ifp, "rss_config failed: %d\n", rc);
2566 		goto done;
2567 	}
2568 
2569 	pi->flags |= PORT_INIT_DONE;
2570 done:
2571 	if (rc != 0)
2572 		port_full_uninit(pi);
2573 
2574 	return (rc);
2575 }
2576 
2577 /*
2578  * Idempotent.
2579  */
2580 static int
2581 port_full_uninit(struct port_info *pi)
2582 {
2583 	struct adapter *sc = pi->adapter;
2584 	int i;
2585 	struct sge_rxq *rxq;
2586 	struct sge_txq *txq;
2587 #ifdef TCP_OFFLOAD
2588 	struct sge_ofld_rxq *ofld_rxq;
2589 	struct sge_wrq *ofld_txq;
2590 #endif
2591 
2592 	if (pi->flags & PORT_INIT_DONE) {
2593 
2594 		/* Need to quiesce queues.  XXX: ctrl queues? */
2595 
2596 		for_each_txq(pi, i, txq) {
2597 			quiesce_eq(sc, &txq->eq);
2598 		}
2599 
2600 #ifdef TCP_OFFLOAD
2601 		for_each_ofld_txq(pi, i, ofld_txq) {
2602 			quiesce_eq(sc, &ofld_txq->eq);
2603 		}
2604 #endif
2605 
2606 		for_each_rxq(pi, i, rxq) {
2607 			quiesce_iq(sc, &rxq->iq);
2608 			quiesce_fl(sc, &rxq->fl);
2609 		}
2610 
2611 #ifdef TCP_OFFLOAD
2612 		for_each_ofld_rxq(pi, i, ofld_rxq) {
2613 			quiesce_iq(sc, &ofld_rxq->iq);
2614 			quiesce_fl(sc, &ofld_rxq->fl);
2615 		}
2616 #endif
2617 	}
2618 
2619 	t4_teardown_port_queues(pi);
2620 	pi->flags &= ~PORT_INIT_DONE;
2621 
2622 	return (0);
2623 }
2624 
2625 static void
2626 quiesce_eq(struct adapter *sc, struct sge_eq *eq)
2627 {
2628 	EQ_LOCK(eq);
2629 	eq->flags |= EQ_DOOMED;
2630 
2631 	/*
2632 	 * Wait for the response to a credit flush if one's
2633 	 * pending.
2634 	 */
2635 	while (eq->flags & EQ_CRFLUSHED)
2636 		mtx_sleep(eq, &eq->eq_lock, 0, "crflush", 0);
2637 	EQ_UNLOCK(eq);
2638 
2639 	callout_drain(&eq->tx_callout);	/* XXX: iffy */
2640 	pause("callout", 10);		/* Still iffy */
2641 
2642 	taskqueue_drain(sc->tq[eq->tx_chan], &eq->tx_task);
2643 }
2644 
2645 static void
2646 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
2647 {
2648 	(void) sc;	/* unused */
2649 
2650 	/* Synchronize with the interrupt handler */
2651 	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
2652 		pause("iqfree", 1);
2653 }
2654 
2655 static void
2656 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
2657 {
2658 	mtx_lock(&sc->sfl_lock);
2659 	FL_LOCK(fl);
2660 	fl->flags |= FL_DOOMED;
2661 	FL_UNLOCK(fl);
2662 	mtx_unlock(&sc->sfl_lock);
2663 
2664 	callout_drain(&sc->sfl_callout);
2665 	KASSERT((fl->flags & FL_STARVING) == 0,
2666 	    ("%s: still starving", __func__));
2667 }
2668 
2669 static int
2670 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
2671     driver_intr_t *handler, void *arg, char *name)
2672 {
2673 	int rc;
2674 
2675 	irq->rid = rid;
2676 	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
2677 	    RF_SHAREABLE | RF_ACTIVE);
2678 	if (irq->res == NULL) {
2679 		device_printf(sc->dev,
2680 		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
2681 		return (ENOMEM);
2682 	}
2683 
2684 	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
2685 	    NULL, handler, arg, &irq->tag);
2686 	if (rc != 0) {
2687 		device_printf(sc->dev,
2688 		    "failed to setup interrupt for rid %d, name %s: %d\n",
2689 		    rid, name, rc);
2690 	} else if (name)
2691 		bus_describe_intr(sc->dev, irq->res, irq->tag, name);
2692 
2693 	return (rc);
2694 }
2695 
2696 static int
2697 t4_free_irq(struct adapter *sc, struct irq *irq)
2698 {
2699 	if (irq->tag)
2700 		bus_teardown_intr(sc->dev, irq->res, irq->tag);
2701 	if (irq->res)
2702 		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
2703 
2704 	bzero(irq, sizeof(*irq));
2705 
2706 	return (0);
2707 }
2708 
2709 static void
2710 reg_block_dump(struct adapter *sc, uint8_t *buf, unsigned int start,
2711     unsigned int end)
2712 {
2713 	uint32_t *p = (uint32_t *)(buf + start);
2714 
2715 	for ( ; start <= end; start += sizeof(uint32_t))
2716 		*p++ = t4_read_reg(sc, start);
2717 }
2718 
2719 static void
2720 t4_get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
2721 {
2722 	int i;
2723 	static const unsigned int reg_ranges[] = {
2724 		0x1008, 0x1108,
2725 		0x1180, 0x11b4,
2726 		0x11fc, 0x123c,
2727 		0x1300, 0x173c,
2728 		0x1800, 0x18fc,
2729 		0x3000, 0x30d8,
2730 		0x30e0, 0x5924,
2731 		0x5960, 0x59d4,
2732 		0x5a00, 0x5af8,
2733 		0x6000, 0x6098,
2734 		0x6100, 0x6150,
2735 		0x6200, 0x6208,
2736 		0x6240, 0x6248,
2737 		0x6280, 0x6338,
2738 		0x6370, 0x638c,
2739 		0x6400, 0x643c,
2740 		0x6500, 0x6524,
2741 		0x6a00, 0x6a38,
2742 		0x6a60, 0x6a78,
2743 		0x6b00, 0x6b84,
2744 		0x6bf0, 0x6c84,
2745 		0x6cf0, 0x6d84,
2746 		0x6df0, 0x6e84,
2747 		0x6ef0, 0x6f84,
2748 		0x6ff0, 0x7084,
2749 		0x70f0, 0x7184,
2750 		0x71f0, 0x7284,
2751 		0x72f0, 0x7384,
2752 		0x73f0, 0x7450,
2753 		0x7500, 0x7530,
2754 		0x7600, 0x761c,
2755 		0x7680, 0x76cc,
2756 		0x7700, 0x7798,
2757 		0x77c0, 0x77fc,
2758 		0x7900, 0x79fc,
2759 		0x7b00, 0x7c38,
2760 		0x7d00, 0x7efc,
2761 		0x8dc0, 0x8e1c,
2762 		0x8e30, 0x8e78,
2763 		0x8ea0, 0x8f6c,
2764 		0x8fc0, 0x9074,
2765 		0x90fc, 0x90fc,
2766 		0x9400, 0x9458,
2767 		0x9600, 0x96bc,
2768 		0x9800, 0x9808,
2769 		0x9820, 0x983c,
2770 		0x9850, 0x9864,
2771 		0x9c00, 0x9c6c,
2772 		0x9c80, 0x9cec,
2773 		0x9d00, 0x9d6c,
2774 		0x9d80, 0x9dec,
2775 		0x9e00, 0x9e6c,
2776 		0x9e80, 0x9eec,
2777 		0x9f00, 0x9f6c,
2778 		0x9f80, 0x9fec,
2779 		0xd004, 0xd03c,
2780 		0xdfc0, 0xdfe0,
2781 		0xe000, 0xea7c,
2782 		0xf000, 0x11190,
2783 		0x19040, 0x1906c,
2784 		0x19078, 0x19080,
2785 		0x1908c, 0x19124,
2786 		0x19150, 0x191b0,
2787 		0x191d0, 0x191e8,
2788 		0x19238, 0x1924c,
2789 		0x193f8, 0x19474,
2790 		0x19490, 0x194f8,
2791 		0x19800, 0x19f30,
2792 		0x1a000, 0x1a06c,
2793 		0x1a0b0, 0x1a120,
2794 		0x1a128, 0x1a138,
2795 		0x1a190, 0x1a1c4,
2796 		0x1a1fc, 0x1a1fc,
2797 		0x1e040, 0x1e04c,
2798 		0x1e284, 0x1e28c,
2799 		0x1e2c0, 0x1e2c0,
2800 		0x1e2e0, 0x1e2e0,
2801 		0x1e300, 0x1e384,
2802 		0x1e3c0, 0x1e3c8,
2803 		0x1e440, 0x1e44c,
2804 		0x1e684, 0x1e68c,
2805 		0x1e6c0, 0x1e6c0,
2806 		0x1e6e0, 0x1e6e0,
2807 		0x1e700, 0x1e784,
2808 		0x1e7c0, 0x1e7c8,
2809 		0x1e840, 0x1e84c,
2810 		0x1ea84, 0x1ea8c,
2811 		0x1eac0, 0x1eac0,
2812 		0x1eae0, 0x1eae0,
2813 		0x1eb00, 0x1eb84,
2814 		0x1ebc0, 0x1ebc8,
2815 		0x1ec40, 0x1ec4c,
2816 		0x1ee84, 0x1ee8c,
2817 		0x1eec0, 0x1eec0,
2818 		0x1eee0, 0x1eee0,
2819 		0x1ef00, 0x1ef84,
2820 		0x1efc0, 0x1efc8,
2821 		0x1f040, 0x1f04c,
2822 		0x1f284, 0x1f28c,
2823 		0x1f2c0, 0x1f2c0,
2824 		0x1f2e0, 0x1f2e0,
2825 		0x1f300, 0x1f384,
2826 		0x1f3c0, 0x1f3c8,
2827 		0x1f440, 0x1f44c,
2828 		0x1f684, 0x1f68c,
2829 		0x1f6c0, 0x1f6c0,
2830 		0x1f6e0, 0x1f6e0,
2831 		0x1f700, 0x1f784,
2832 		0x1f7c0, 0x1f7c8,
2833 		0x1f840, 0x1f84c,
2834 		0x1fa84, 0x1fa8c,
2835 		0x1fac0, 0x1fac0,
2836 		0x1fae0, 0x1fae0,
2837 		0x1fb00, 0x1fb84,
2838 		0x1fbc0, 0x1fbc8,
2839 		0x1fc40, 0x1fc4c,
2840 		0x1fe84, 0x1fe8c,
2841 		0x1fec0, 0x1fec0,
2842 		0x1fee0, 0x1fee0,
2843 		0x1ff00, 0x1ff84,
2844 		0x1ffc0, 0x1ffc8,
2845 		0x20000, 0x2002c,
2846 		0x20100, 0x2013c,
2847 		0x20190, 0x201c8,
2848 		0x20200, 0x20318,
2849 		0x20400, 0x20528,
2850 		0x20540, 0x20614,
2851 		0x21000, 0x21040,
2852 		0x2104c, 0x21060,
2853 		0x210c0, 0x210ec,
2854 		0x21200, 0x21268,
2855 		0x21270, 0x21284,
2856 		0x212fc, 0x21388,
2857 		0x21400, 0x21404,
2858 		0x21500, 0x21518,
2859 		0x2152c, 0x2153c,
2860 		0x21550, 0x21554,
2861 		0x21600, 0x21600,
2862 		0x21608, 0x21628,
2863 		0x21630, 0x2163c,
2864 		0x21700, 0x2171c,
2865 		0x21780, 0x2178c,
2866 		0x21800, 0x21c38,
2867 		0x21c80, 0x21d7c,
2868 		0x21e00, 0x21e04,
2869 		0x22000, 0x2202c,
2870 		0x22100, 0x2213c,
2871 		0x22190, 0x221c8,
2872 		0x22200, 0x22318,
2873 		0x22400, 0x22528,
2874 		0x22540, 0x22614,
2875 		0x23000, 0x23040,
2876 		0x2304c, 0x23060,
2877 		0x230c0, 0x230ec,
2878 		0x23200, 0x23268,
2879 		0x23270, 0x23284,
2880 		0x232fc, 0x23388,
2881 		0x23400, 0x23404,
2882 		0x23500, 0x23518,
2883 		0x2352c, 0x2353c,
2884 		0x23550, 0x23554,
2885 		0x23600, 0x23600,
2886 		0x23608, 0x23628,
2887 		0x23630, 0x2363c,
2888 		0x23700, 0x2371c,
2889 		0x23780, 0x2378c,
2890 		0x23800, 0x23c38,
2891 		0x23c80, 0x23d7c,
2892 		0x23e00, 0x23e04,
2893 		0x24000, 0x2402c,
2894 		0x24100, 0x2413c,
2895 		0x24190, 0x241c8,
2896 		0x24200, 0x24318,
2897 		0x24400, 0x24528,
2898 		0x24540, 0x24614,
2899 		0x25000, 0x25040,
2900 		0x2504c, 0x25060,
2901 		0x250c0, 0x250ec,
2902 		0x25200, 0x25268,
2903 		0x25270, 0x25284,
2904 		0x252fc, 0x25388,
2905 		0x25400, 0x25404,
2906 		0x25500, 0x25518,
2907 		0x2552c, 0x2553c,
2908 		0x25550, 0x25554,
2909 		0x25600, 0x25600,
2910 		0x25608, 0x25628,
2911 		0x25630, 0x2563c,
2912 		0x25700, 0x2571c,
2913 		0x25780, 0x2578c,
2914 		0x25800, 0x25c38,
2915 		0x25c80, 0x25d7c,
2916 		0x25e00, 0x25e04,
2917 		0x26000, 0x2602c,
2918 		0x26100, 0x2613c,
2919 		0x26190, 0x261c8,
2920 		0x26200, 0x26318,
2921 		0x26400, 0x26528,
2922 		0x26540, 0x26614,
2923 		0x27000, 0x27040,
2924 		0x2704c, 0x27060,
2925 		0x270c0, 0x270ec,
2926 		0x27200, 0x27268,
2927 		0x27270, 0x27284,
2928 		0x272fc, 0x27388,
2929 		0x27400, 0x27404,
2930 		0x27500, 0x27518,
2931 		0x2752c, 0x2753c,
2932 		0x27550, 0x27554,
2933 		0x27600, 0x27600,
2934 		0x27608, 0x27628,
2935 		0x27630, 0x2763c,
2936 		0x27700, 0x2771c,
2937 		0x27780, 0x2778c,
2938 		0x27800, 0x27c38,
2939 		0x27c80, 0x27d7c,
2940 		0x27e00, 0x27e04
2941 	};
2942 
2943 	regs->version = 4 | (sc->params.rev << 10);
2944 	for (i = 0; i < nitems(reg_ranges); i += 2)
2945 		reg_block_dump(sc, buf, reg_ranges[i], reg_ranges[i + 1]);
2946 }
2947 
2948 static void
2949 cxgbe_tick(void *arg)
2950 {
2951 	struct port_info *pi = arg;
2952 	struct ifnet *ifp = pi->ifp;
2953 	struct sge_txq *txq;
2954 	int i, drops;
2955 	struct port_stats *s = &pi->stats;
2956 
2957 	PORT_LOCK(pi);
2958 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2959 		PORT_UNLOCK(pi);
2960 		return;	/* without scheduling another callout */
2961 	}
2962 
2963 	t4_get_port_stats(pi->adapter, pi->tx_chan, s);
2964 
2965 	ifp->if_opackets = s->tx_frames - s->tx_pause;
2966 	ifp->if_ipackets = s->rx_frames - s->rx_pause;
2967 	ifp->if_obytes = s->tx_octets - s->tx_pause * 64;
2968 	ifp->if_ibytes = s->rx_octets - s->rx_pause * 64;
2969 	ifp->if_omcasts = s->tx_mcast_frames - s->tx_pause;
2970 	ifp->if_imcasts = s->rx_mcast_frames - s->rx_pause;
2971 	ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2972 	    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2973 	    s->rx_trunc3;
2974 
2975 	drops = s->tx_drop;
2976 	for_each_txq(pi, i, txq)
2977 		drops += txq->br->br_drops;
2978 	ifp->if_snd.ifq_drops = drops;
2979 
2980 	ifp->if_oerrors = s->tx_error_frames;
2981 	ifp->if_ierrors = s->rx_jabber + s->rx_runt + s->rx_too_long +
2982 	    s->rx_fcs_err + s->rx_len_err;
2983 
2984 	callout_schedule(&pi->tick, hz);
2985 	PORT_UNLOCK(pi);
2986 }
2987 
2988 static void
2989 cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
2990 {
2991 	struct ifnet *vlan;
2992 
2993 	if (arg != ifp || ifp->if_type != IFT_ETHER)
2994 		return;
2995 
2996 	vlan = VLAN_DEVAT(ifp, vid);
2997 	VLAN_SETCOOKIE(vlan, ifp);
2998 }
2999 
3000 static int
3001 cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
3002 {
3003 
3004 #ifdef INVARIANTS
3005 	panic("%s: opcode 0x%02x on iq %p with payload %p",
3006 	    __func__, rss->opcode, iq, m);
3007 #else
3008 	log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n",
3009 	    __func__, rss->opcode, iq, m);
3010 	m_freem(m);
3011 #endif
3012 	return (EDOOFUS);
3013 }
3014 
3015 int
3016 t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3017 {
3018 	uintptr_t *loc, new;
3019 
3020 	if (opcode >= nitems(sc->cpl_handler))
3021 		return (EINVAL);
3022 
3023 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3024 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3025 	atomic_store_rel_ptr(loc, new);
3026 
3027 	return (0);
3028 }
3029 
3030 static int
3031 an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl)
3032 {
3033 
3034 #ifdef INVARIANTS
3035 	panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl);
3036 #else
3037 	log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n",
3038 	    __func__, iq, ctrl);
3039 #endif
3040 	return (EDOOFUS);
3041 }
3042 
3043 int
3044 t4_register_an_handler(struct adapter *sc, an_handler_t h)
3045 {
3046 	uintptr_t *loc, new;
3047 
3048 	new = h ? (uintptr_t)h : (uintptr_t)an_not_handled;
3049 	loc = (uintptr_t *) &sc->an_handler;
3050 	atomic_store_rel_ptr(loc, new);
3051 
3052 	return (0);
3053 }
3054 
3055 static int
3056 fw_msg_not_handled(struct adapter *sc, const __be64 *rpl)
3057 {
3058 	const struct cpl_fw6_msg *cpl =
3059 	    __containerof(rpl, struct cpl_fw6_msg, data[0]);
3060 
3061 #ifdef INVARIANTS
3062 	panic("%s: fw_msg type %d", __func__, cpl->type);
3063 #else
3064 	log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type);
3065 #endif
3066 	return (EDOOFUS);
3067 }
3068 
3069 int
3070 t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h)
3071 {
3072 	uintptr_t *loc, new;
3073 
3074 	if (type >= nitems(sc->fw_msg_handler))
3075 		return (EINVAL);
3076 
3077 	new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled;
3078 	loc = (uintptr_t *) &sc->fw_msg_handler[type];
3079 	atomic_store_rel_ptr(loc, new);
3080 
3081 	return (0);
3082 }
3083 
3084 static int
3085 t4_sysctls(struct adapter *sc)
3086 {
3087 	struct sysctl_ctx_list *ctx;
3088 	struct sysctl_oid *oid;
3089 	struct sysctl_oid_list *children, *c0;
3090 	static char *caps[] = {
3091 		"\20\1PPP\2QFC\3DCBX",			/* caps[0] linkcaps */
3092 		"\20\1NIC\2VM\3IDS\4UM\5UM_ISGL",	/* caps[1] niccaps */
3093 		"\20\1TOE",				/* caps[2] toecaps */
3094 		"\20\1RDDP\2RDMAC",			/* caps[3] rdmacaps */
3095 		"\20\1INITIATOR_PDU\2TARGET_PDU"	/* caps[4] iscsicaps */
3096 		    "\3INITIATOR_CNXOFLD\4TARGET_CNXOFLD"
3097 		    "\5INITIATOR_SSNOFLD\6TARGET_SSNOFLD",
3098 		"\20\1INITIATOR\2TARGET\3CTRL_OFLD"	/* caps[5] fcoecaps */
3099 	};
3100 
3101 	ctx = device_get_sysctl_ctx(sc->dev);
3102 
3103 	/*
3104 	 * dev.t4nex.X.
3105 	 */
3106 	oid = device_get_sysctl_tree(sc->dev);
3107 	c0 = children = SYSCTL_CHILDREN(oid);
3108 
3109 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD,
3110 	    &sc->params.nports, 0, "# of ports");
3111 
3112 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
3113 	    &sc->params.rev, 0, "chip hardware revision");
3114 
3115 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
3116 	    CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
3117 
3118 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
3119 	    CTLFLAG_RD, &sc->cfg_file, 0, "configuration file");
3120 
3121 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD,
3122 	    &sc->cfcsum, 0, "config file checksum");
3123 
3124 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkcaps",
3125 	    CTLTYPE_STRING | CTLFLAG_RD, caps[0], sc->linkcaps,
3126 	    sysctl_bitfield, "A", "available link capabilities");
3127 
3128 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "niccaps",
3129 	    CTLTYPE_STRING | CTLFLAG_RD, caps[1], sc->niccaps,
3130 	    sysctl_bitfield, "A", "available NIC capabilities");
3131 
3132 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "toecaps",
3133 	    CTLTYPE_STRING | CTLFLAG_RD, caps[2], sc->toecaps,
3134 	    sysctl_bitfield, "A", "available TCP offload capabilities");
3135 
3136 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdmacaps",
3137 	    CTLTYPE_STRING | CTLFLAG_RD, caps[3], sc->rdmacaps,
3138 	    sysctl_bitfield, "A", "available RDMA capabilities");
3139 
3140 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "iscsicaps",
3141 	    CTLTYPE_STRING | CTLFLAG_RD, caps[4], sc->iscsicaps,
3142 	    sysctl_bitfield, "A", "available iSCSI capabilities");
3143 
3144 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoecaps",
3145 	    CTLTYPE_STRING | CTLFLAG_RD, caps[5], sc->fcoecaps,
3146 	    sysctl_bitfield, "A", "available FCoE capabilities");
3147 
3148 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD,
3149 	    &sc->params.vpd.cclk, 0, "core clock frequency (in KHz)");
3150 
3151 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
3152 	    CTLTYPE_STRING | CTLFLAG_RD, sc->sge.timer_val,
3153 	    sizeof(sc->sge.timer_val), sysctl_int_array, "A",
3154 	    "interrupt holdoff timer values (us)");
3155 
3156 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
3157 	    CTLTYPE_STRING | CTLFLAG_RD, sc->sge.counter_val,
3158 	    sizeof(sc->sge.counter_val), sysctl_int_array, "A",
3159 	    "interrupt holdoff packet counter values");
3160 
3161 #ifdef SBUF_DRAIN
3162 	/*
3163 	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
3164 	 */
3165 	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
3166 	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
3167 	    "logs and miscellaneous information");
3168 	children = SYSCTL_CHILDREN(oid);
3169 
3170 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
3171 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3172 	    sysctl_cctrl, "A", "congestion control");
3173 
3174 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
3175 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3176 	    sysctl_cpl_stats, "A", "CPL statistics");
3177 
3178 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
3179 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3180 	    sysctl_ddp_stats, "A", "DDP statistics");
3181 
3182 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
3183 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3184 	    sysctl_devlog, "A", "firmware's device log");
3185 
3186 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
3187 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3188 	    sysctl_fcoe_stats, "A", "FCoE statistics");
3189 
3190 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
3191 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3192 	    sysctl_hw_sched, "A", "hardware scheduler ");
3193 
3194 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
3195 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3196 	    sysctl_l2t, "A", "hardware L2 table");
3197 
3198 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
3199 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3200 	    sysctl_lb_stats, "A", "loopback statistics");
3201 
3202 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
3203 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3204 	    sysctl_meminfo, "A", "memory regions");
3205 
3206 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
3207 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3208 	    sysctl_path_mtus, "A", "path MTUs");
3209 
3210 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
3211 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3212 	    sysctl_pm_stats, "A", "PM statistics");
3213 
3214 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
3215 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3216 	    sysctl_rdma_stats, "A", "RDMA statistics");
3217 
3218 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
3219 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3220 	    sysctl_tcp_stats, "A", "TCP statistics");
3221 
3222 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
3223 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3224 	    sysctl_tids, "A", "TID information");
3225 
3226 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
3227 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3228 	    sysctl_tp_err_stats, "A", "TP error statistics");
3229 
3230 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
3231 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
3232 	    sysctl_tx_rate, "A", "Tx rate");
3233 #endif
3234 
3235 #ifdef TCP_OFFLOAD
3236 	if (is_offload(sc)) {
3237 		/*
3238 		 * dev.t4nex.X.toe.
3239 		 */
3240 		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
3241 		    NULL, "TOE parameters");
3242 		children = SYSCTL_CHILDREN(oid);
3243 
3244 		sc->tt.sndbuf = 256 * 1024;
3245 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
3246 		    &sc->tt.sndbuf, 0, "max hardware send buffer size");
3247 
3248 		sc->tt.ddp = 0;
3249 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
3250 		    &sc->tt.ddp, 0, "DDP allowed");
3251 
3252 		sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5));
3253 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW,
3254 		    &sc->tt.indsz, 0, "DDP max indicate size allowed");
3255 
3256 		sc->tt.ddp_thres =
3257 		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
3258 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW,
3259 		    &sc->tt.ddp_thres, 0, "DDP threshold");
3260 	}
3261 #endif
3262 
3263 
3264 	return (0);
3265 }
3266 
3267 static int
3268 cxgbe_sysctls(struct port_info *pi)
3269 {
3270 	struct sysctl_ctx_list *ctx;
3271 	struct sysctl_oid *oid;
3272 	struct sysctl_oid_list *children;
3273 
3274 	ctx = device_get_sysctl_ctx(pi->dev);
3275 
3276 	/*
3277 	 * dev.cxgbe.X.
3278 	 */
3279 	oid = device_get_sysctl_tree(pi->dev);
3280 	children = SYSCTL_CHILDREN(oid);
3281 
3282 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
3283 	    &pi->nrxq, 0, "# of rx queues");
3284 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
3285 	    &pi->ntxq, 0, "# of tx queues");
3286 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
3287 	    &pi->first_rxq, 0, "index of first rx queue");
3288 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
3289 	    &pi->first_txq, 0, "index of first tx queue");
3290 
3291 #ifdef TCP_OFFLOAD
3292 	if (is_offload(pi->adapter)) {
3293 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
3294 		    &pi->nofldrxq, 0,
3295 		    "# of rx queues for offloaded TCP connections");
3296 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
3297 		    &pi->nofldtxq, 0,
3298 		    "# of tx queues for offloaded TCP connections");
3299 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
3300 		    CTLFLAG_RD, &pi->first_ofld_rxq, 0,
3301 		    "index of first TOE rx queue");
3302 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
3303 		    CTLFLAG_RD, &pi->first_ofld_txq, 0,
3304 		    "index of first TOE tx queue");
3305 	}
3306 #endif
3307 
3308 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
3309 	    CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_tmr_idx, "I",
3310 	    "holdoff timer index");
3311 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
3312 	    CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_pktc_idx, "I",
3313 	    "holdoff packet counter index");
3314 
3315 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
3316 	    CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_rxq, "I",
3317 	    "rx queue size");
3318 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
3319 	    CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_txq, "I",
3320 	    "tx queue size");
3321 
3322 	/*
3323 	 * dev.cxgbe.X.stats.
3324 	 */
3325 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
3326 	    NULL, "port statistics");
3327 	children = SYSCTL_CHILDREN(oid);
3328 
3329 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
3330 	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
3331 	    CTLTYPE_U64 | CTLFLAG_RD, pi->adapter, reg, \
3332 	    sysctl_handle_t4_reg64, "QU", desc)
3333 
3334 	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
3335 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
3336 	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
3337 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
3338 	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
3339 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
3340 	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
3341 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
3342 	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
3343 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
3344 	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
3345 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
3346 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
3347 	    "# of tx frames in this range",
3348 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
3349 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
3350 	    "# of tx frames in this range",
3351 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
3352 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
3353 	    "# of tx frames in this range",
3354 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
3355 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
3356 	    "# of tx frames in this range",
3357 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
3358 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
3359 	    "# of tx frames in this range",
3360 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
3361 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
3362 	    "# of tx frames in this range",
3363 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
3364 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
3365 	    "# of tx frames in this range",
3366 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
3367 	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
3368 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
3369 	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
3370 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
3371 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
3372 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
3373 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
3374 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
3375 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
3376 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
3377 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
3378 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
3379 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
3380 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
3381 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
3382 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
3383 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
3384 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
3385 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
3386 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
3387 
3388 	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
3389 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
3390 	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
3391 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
3392 	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
3393 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
3394 	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
3395 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
3396 	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
3397 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
3398 	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
3399 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
3400 	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
3401 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
3402 	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
3403 	    "# of frames received with bad FCS",
3404 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
3405 	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
3406 	    "# of frames received with length error",
3407 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
3408 	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
3409 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
3410 	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
3411 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
3412 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
3413 	    "# of rx frames in this range",
3414 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
3415 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
3416 	    "# of rx frames in this range",
3417 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
3418 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
3419 	    "# of rx frames in this range",
3420 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
3421 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
3422 	    "# of rx frames in this range",
3423 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
3424 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
3425 	    "# of rx frames in this range",
3426 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
3427 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
3428 	    "# of rx frames in this range",
3429 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
3430 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
3431 	    "# of rx frames in this range",
3432 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
3433 	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
3434 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
3435 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
3436 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
3437 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
3438 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
3439 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
3440 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
3441 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
3442 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
3443 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
3444 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
3445 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
3446 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
3447 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
3448 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
3449 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
3450 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
3451 
3452 #undef SYSCTL_ADD_T4_REG64
3453 
3454 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
3455 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
3456 	    &pi->stats.name, desc)
3457 
3458 	/* We get these from port_stats and they may be stale by upto 1s */
3459 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
3460 	    "# drops due to buffer-group 0 overflows");
3461 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
3462 	    "# drops due to buffer-group 1 overflows");
3463 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
3464 	    "# drops due to buffer-group 2 overflows");
3465 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
3466 	    "# drops due to buffer-group 3 overflows");
3467 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
3468 	    "# of buffer-group 0 truncated packets");
3469 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
3470 	    "# of buffer-group 1 truncated packets");
3471 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
3472 	    "# of buffer-group 2 truncated packets");
3473 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
3474 	    "# of buffer-group 3 truncated packets");
3475 
3476 #undef SYSCTL_ADD_T4_PORTSTAT
3477 
3478 	return (0);
3479 }
3480 
3481 static int
3482 sysctl_int_array(SYSCTL_HANDLER_ARGS)
3483 {
3484 	int rc, *i;
3485 	struct sbuf sb;
3486 
3487 	sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
3488 	for (i = arg1; arg2; arg2 -= sizeof(int), i++)
3489 		sbuf_printf(&sb, "%d ", *i);
3490 	sbuf_trim(&sb);
3491 	sbuf_finish(&sb);
3492 	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
3493 	sbuf_delete(&sb);
3494 	return (rc);
3495 }
3496 
3497 static int
3498 sysctl_bitfield(SYSCTL_HANDLER_ARGS)
3499 {
3500 	int rc;
3501 	struct sbuf *sb;
3502 
3503 	rc = sysctl_wire_old_buffer(req, 0);
3504 	if (rc != 0)
3505 		return(rc);
3506 
3507 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3508 	if (sb == NULL)
3509 		return (ENOMEM);
3510 
3511 	sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
3512 	rc = sbuf_finish(sb);
3513 	sbuf_delete(sb);
3514 
3515 	return (rc);
3516 }
3517 
3518 static int
3519 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
3520 {
3521 	struct port_info *pi = arg1;
3522 	struct adapter *sc = pi->adapter;
3523 	int idx, rc, i;
3524 	struct sge_rxq *rxq;
3525 	uint8_t v;
3526 
3527 	idx = pi->tmr_idx;
3528 
3529 	rc = sysctl_handle_int(oidp, &idx, 0, req);
3530 	if (rc != 0 || req->newptr == NULL)
3531 		return (rc);
3532 
3533 	if (idx < 0 || idx >= SGE_NTIMERS)
3534 		return (EINVAL);
3535 
3536 	rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
3537 	    "t4tmr");
3538 	if (rc)
3539 		return (rc);
3540 
3541 	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1);
3542 	for_each_rxq(pi, i, rxq) {
3543 #ifdef atomic_store_rel_8
3544 		atomic_store_rel_8(&rxq->iq.intr_params, v);
3545 #else
3546 		rxq->iq.intr_params = v;
3547 #endif
3548 	}
3549 	pi->tmr_idx = idx;
3550 
3551 	end_synchronized_op(sc, LOCK_HELD);
3552 	return (0);
3553 }
3554 
3555 static int
3556 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
3557 {
3558 	struct port_info *pi = arg1;
3559 	struct adapter *sc = pi->adapter;
3560 	int idx, rc;
3561 
3562 	idx = pi->pktc_idx;
3563 
3564 	rc = sysctl_handle_int(oidp, &idx, 0, req);
3565 	if (rc != 0 || req->newptr == NULL)
3566 		return (rc);
3567 
3568 	if (idx < -1 || idx >= SGE_NCOUNTERS)
3569 		return (EINVAL);
3570 
3571 	rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
3572 	    "t4pktc");
3573 	if (rc)
3574 		return (rc);
3575 
3576 	if (pi->flags & PORT_INIT_DONE)
3577 		rc = EBUSY; /* cannot be changed once the queues are created */
3578 	else
3579 		pi->pktc_idx = idx;
3580 
3581 	end_synchronized_op(sc, LOCK_HELD);
3582 	return (rc);
3583 }
3584 
3585 static int
3586 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
3587 {
3588 	struct port_info *pi = arg1;
3589 	struct adapter *sc = pi->adapter;
3590 	int qsize, rc;
3591 
3592 	qsize = pi->qsize_rxq;
3593 
3594 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
3595 	if (rc != 0 || req->newptr == NULL)
3596 		return (rc);
3597 
3598 	if (qsize < 128 || (qsize & 7))
3599 		return (EINVAL);
3600 
3601 	rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
3602 	    "t4rxqs");
3603 	if (rc)
3604 		return (rc);
3605 
3606 	if (pi->flags & PORT_INIT_DONE)
3607 		rc = EBUSY; /* cannot be changed once the queues are created */
3608 	else
3609 		pi->qsize_rxq = qsize;
3610 
3611 	end_synchronized_op(sc, LOCK_HELD);
3612 	return (rc);
3613 }
3614 
3615 static int
3616 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
3617 {
3618 	struct port_info *pi = arg1;
3619 	struct adapter *sc = pi->adapter;
3620 	int qsize, rc;
3621 
3622 	qsize = pi->qsize_txq;
3623 
3624 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
3625 	if (rc != 0 || req->newptr == NULL)
3626 		return (rc);
3627 
3628 	/* bufring size must be powerof2 */
3629 	if (qsize < 128 || !powerof2(qsize))
3630 		return (EINVAL);
3631 
3632 	rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
3633 	    "t4txqs");
3634 	if (rc)
3635 		return (rc);
3636 
3637 	if (pi->flags & PORT_INIT_DONE)
3638 		rc = EBUSY; /* cannot be changed once the queues are created */
3639 	else
3640 		pi->qsize_txq = qsize;
3641 
3642 	end_synchronized_op(sc, LOCK_HELD);
3643 	return (rc);
3644 }
3645 
3646 static int
3647 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
3648 {
3649 	struct adapter *sc = arg1;
3650 	int reg = arg2;
3651 	uint64_t val;
3652 
3653 	val = t4_read_reg64(sc, reg);
3654 
3655 	return (sysctl_handle_64(oidp, &val, 0, req));
3656 }
3657 
3658 #ifdef SBUF_DRAIN
3659 static int
3660 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
3661 {
3662 	struct adapter *sc = arg1;
3663 	struct sbuf *sb;
3664 	int rc, i;
3665 	uint16_t incr[NMTUS][NCCTRL_WIN];
3666 	static const char *dec_fac[] = {
3667 		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
3668 		"0.9375"
3669 	};
3670 
3671 	rc = sysctl_wire_old_buffer(req, 0);
3672 	if (rc != 0)
3673 		return (rc);
3674 
3675 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
3676 	if (sb == NULL)
3677 		return (ENOMEM);
3678 
3679 	t4_read_cong_tbl(sc, incr);
3680 
3681 	for (i = 0; i < NCCTRL_WIN; ++i) {
3682 		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
3683 		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
3684 		    incr[5][i], incr[6][i], incr[7][i]);
3685 		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
3686 		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
3687 		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
3688 		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
3689 	}
3690 
3691 	rc = sbuf_finish(sb);
3692 	sbuf_delete(sb);
3693 
3694 	return (rc);
3695 }
3696 
3697 static int
3698 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
3699 {
3700 	struct adapter *sc = arg1;
3701 	struct sbuf *sb;
3702 	int rc;
3703 	struct tp_cpl_stats stats;
3704 
3705 	rc = sysctl_wire_old_buffer(req, 0);
3706 	if (rc != 0)
3707 		return (rc);
3708 
3709 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
3710 	if (sb == NULL)
3711 		return (ENOMEM);
3712 
3713 	t4_tp_get_cpl_stats(sc, &stats);
3714 
3715 	sbuf_printf(sb, "                 channel 0  channel 1  channel 2  "
3716 	    "channel 3\n");
3717 	sbuf_printf(sb, "CPL requests:   %10u %10u %10u %10u\n",
3718 		   stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
3719 	sbuf_printf(sb, "CPL responses:  %10u %10u %10u %10u",
3720 		   stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
3721 
3722 	rc = sbuf_finish(sb);
3723 	sbuf_delete(sb);
3724 
3725 	return (rc);
3726 }
3727 
3728 static int
3729 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
3730 {
3731 	struct adapter *sc = arg1;
3732 	struct sbuf *sb;
3733 	int rc;
3734 	struct tp_usm_stats stats;
3735 
3736 	rc = sysctl_wire_old_buffer(req, 0);
3737 	if (rc != 0)
3738 		return(rc);
3739 
3740 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
3741 	if (sb == NULL)
3742 		return (ENOMEM);
3743 
3744 	t4_get_usm_stats(sc, &stats);
3745 
3746 	sbuf_printf(sb, "Frames: %u\n", stats.frames);
3747 	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
3748 	sbuf_printf(sb, "Drops:  %u", stats.drops);
3749 
3750 	rc = sbuf_finish(sb);
3751 	sbuf_delete(sb);
3752 
3753 	return (rc);
3754 }
3755 
3756 const char *devlog_level_strings[] = {
3757 	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
3758 	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
3759 	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
3760 	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
3761 	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
3762 	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
3763 };
3764 
3765 const char *devlog_facility_strings[] = {
3766 	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
3767 	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
3768 	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
3769 	[FW_DEVLOG_FACILITY_RES]	= "RES",
3770 	[FW_DEVLOG_FACILITY_HW]		= "HW",
3771 	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
3772 	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
3773 	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
3774 	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
3775 	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
3776 	[FW_DEVLOG_FACILITY_VI]		= "VI",
3777 	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
3778 	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
3779 	[FW_DEVLOG_FACILITY_TM]		= "TM",
3780 	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
3781 	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
3782 	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
3783 	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
3784 	[FW_DEVLOG_FACILITY_RI]		= "RI",
3785 	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
3786 	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
3787 	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
3788 	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE"
3789 };
3790 
3791 static int
3792 sysctl_devlog(SYSCTL_HANDLER_ARGS)
3793 {
3794 	struct adapter *sc = arg1;
3795 	struct devlog_params *dparams = &sc->params.devlog;
3796 	struct fw_devlog_e *buf, *e;
3797 	int i, j, rc, nentries, first = 0;
3798 	struct sbuf *sb;
3799 	uint64_t ftstamp = UINT64_MAX;
3800 
3801 	if (dparams->start == 0)
3802 		return (ENXIO);
3803 
3804 	nentries = dparams->size / sizeof(struct fw_devlog_e);
3805 
3806 	buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
3807 	if (buf == NULL)
3808 		return (ENOMEM);
3809 
3810 	rc = -t4_mem_read(sc, dparams->memtype, dparams->start, dparams->size,
3811 	    (void *)buf);
3812 	if (rc != 0)
3813 		goto done;
3814 
3815 	for (i = 0; i < nentries; i++) {
3816 		e = &buf[i];
3817 
3818 		if (e->timestamp == 0)
3819 			break;	/* end */
3820 
3821 		e->timestamp = be64toh(e->timestamp);
3822 		e->seqno = be32toh(e->seqno);
3823 		for (j = 0; j < 8; j++)
3824 			e->params[j] = be32toh(e->params[j]);
3825 
3826 		if (e->timestamp < ftstamp) {
3827 			ftstamp = e->timestamp;
3828 			first = i;
3829 		}
3830 	}
3831 
3832 	if (buf[first].timestamp == 0)
3833 		goto done;	/* nothing in the log */
3834 
3835 	rc = sysctl_wire_old_buffer(req, 0);
3836 	if (rc != 0)
3837 		goto done;
3838 
3839 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
3840 	if (sb == NULL) {
3841 		rc = ENOMEM;
3842 		goto done;
3843 	}
3844 	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
3845 	    "Seq#", "Tstamp", "Level", "Facility", "Message");
3846 
3847 	i = first;
3848 	do {
3849 		e = &buf[i];
3850 		if (e->timestamp == 0)
3851 			break;	/* end */
3852 
3853 		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
3854 		    e->seqno, e->timestamp,
3855 		    (e->level < nitems(devlog_level_strings) ?
3856 			devlog_level_strings[e->level] : "UNKNOWN"),
3857 		    (e->facility < nitems(devlog_facility_strings) ?
3858 			devlog_facility_strings[e->facility] : "UNKNOWN"));
3859 		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
3860 		    e->params[2], e->params[3], e->params[4],
3861 		    e->params[5], e->params[6], e->params[7]);
3862 
3863 		if (++i == nentries)
3864 			i = 0;
3865 	} while (i != first);
3866 
3867 	rc = sbuf_finish(sb);
3868 	sbuf_delete(sb);
3869 done:
3870 	free(buf, M_CXGBE);
3871 	return (rc);
3872 }
3873 
3874 static int
3875 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
3876 {
3877 	struct adapter *sc = arg1;
3878 	struct sbuf *sb;
3879 	int rc;
3880 	struct tp_fcoe_stats stats[4];
3881 
3882 	rc = sysctl_wire_old_buffer(req, 0);
3883 	if (rc != 0)
3884 		return (rc);
3885 
3886 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
3887 	if (sb == NULL)
3888 		return (ENOMEM);
3889 
3890 	t4_get_fcoe_stats(sc, 0, &stats[0]);
3891 	t4_get_fcoe_stats(sc, 1, &stats[1]);
3892 	t4_get_fcoe_stats(sc, 2, &stats[2]);
3893 	t4_get_fcoe_stats(sc, 3, &stats[3]);
3894 
3895 	sbuf_printf(sb, "                   channel 0        channel 1        "
3896 	    "channel 2        channel 3\n");
3897 	sbuf_printf(sb, "octetsDDP:  %16ju %16ju %16ju %16ju\n",
3898 	    stats[0].octetsDDP, stats[1].octetsDDP, stats[2].octetsDDP,
3899 	    stats[3].octetsDDP);
3900 	sbuf_printf(sb, "framesDDP:  %16u %16u %16u %16u\n", stats[0].framesDDP,
3901 	    stats[1].framesDDP, stats[2].framesDDP, stats[3].framesDDP);
3902 	sbuf_printf(sb, "framesDrop: %16u %16u %16u %16u",
3903 	    stats[0].framesDrop, stats[1].framesDrop, stats[2].framesDrop,
3904 	    stats[3].framesDrop);
3905 
3906 	rc = sbuf_finish(sb);
3907 	sbuf_delete(sb);
3908 
3909 	return (rc);
3910 }
3911 
3912 static int
3913 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
3914 {
3915 	struct adapter *sc = arg1;
3916 	struct sbuf *sb;
3917 	int rc, i;
3918 	unsigned int map, kbps, ipg, mode;
3919 	unsigned int pace_tab[NTX_SCHED];
3920 
3921 	rc = sysctl_wire_old_buffer(req, 0);
3922 	if (rc != 0)
3923 		return (rc);
3924 
3925 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
3926 	if (sb == NULL)
3927 		return (ENOMEM);
3928 
3929 	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
3930 	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
3931 	t4_read_pace_tbl(sc, pace_tab);
3932 
3933 	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
3934 	    "Class IPG (0.1 ns)   Flow IPG (us)");
3935 
3936 	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
3937 		t4_get_tx_sched(sc, i, &kbps, &ipg);
3938 		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
3939 		    (mode & (1 << i)) ? "flow" : "class", map & 3);
3940 		if (kbps)
3941 			sbuf_printf(sb, "%9u     ", kbps);
3942 		else
3943 			sbuf_printf(sb, " disabled     ");
3944 
3945 		if (ipg)
3946 			sbuf_printf(sb, "%13u        ", ipg);
3947 		else
3948 			sbuf_printf(sb, "     disabled        ");
3949 
3950 		if (pace_tab[i])
3951 			sbuf_printf(sb, "%10u", pace_tab[i]);
3952 		else
3953 			sbuf_printf(sb, "  disabled");
3954 	}
3955 
3956 	rc = sbuf_finish(sb);
3957 	sbuf_delete(sb);
3958 
3959 	return (rc);
3960 }
3961 
3962 static int
3963 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
3964 {
3965 	struct adapter *sc = arg1;
3966 	struct sbuf *sb;
3967 	int rc, i, j;
3968 	uint64_t *p0, *p1;
3969 	struct lb_port_stats s[2];
3970 	static const char *stat_name[] = {
3971 		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
3972 		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
3973 		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
3974 		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
3975 		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
3976 		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
3977 		"BG2FramesTrunc:", "BG3FramesTrunc:"
3978 	};
3979 
3980 	rc = sysctl_wire_old_buffer(req, 0);
3981 	if (rc != 0)
3982 		return (rc);
3983 
3984 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
3985 	if (sb == NULL)
3986 		return (ENOMEM);
3987 
3988 	memset(s, 0, sizeof(s));
3989 
3990 	for (i = 0; i < 4; i += 2) {
3991 		t4_get_lb_stats(sc, i, &s[0]);
3992 		t4_get_lb_stats(sc, i + 1, &s[1]);
3993 
3994 		p0 = &s[0].octets;
3995 		p1 = &s[1].octets;
3996 		sbuf_printf(sb, "%s                       Loopback %u"
3997 		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
3998 
3999 		for (j = 0; j < nitems(stat_name); j++)
4000 			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
4001 				   *p0++, *p1++);
4002 	}
4003 
4004 	rc = sbuf_finish(sb);
4005 	sbuf_delete(sb);
4006 
4007 	return (rc);
4008 }
4009 
4010 struct mem_desc {
4011 	unsigned int base;
4012 	unsigned int limit;
4013 	unsigned int idx;
4014 };
4015 
4016 static int
4017 mem_desc_cmp(const void *a, const void *b)
4018 {
4019 	return ((const struct mem_desc *)a)->base -
4020 	       ((const struct mem_desc *)b)->base;
4021 }
4022 
4023 static void
4024 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
4025     unsigned int to)
4026 {
4027 	unsigned int size;
4028 
4029 	size = to - from + 1;
4030 	if (size == 0)
4031 		return;
4032 
4033 	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
4034 	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
4035 }
4036 
4037 static int
4038 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
4039 {
4040 	struct adapter *sc = arg1;
4041 	struct sbuf *sb;
4042 	int rc, i, n;
4043 	uint32_t lo, hi;
4044 	static const char *memory[] = { "EDC0:", "EDC1:", "MC:" };
4045 	static const char *region[] = {
4046 		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
4047 		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
4048 		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
4049 		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
4050 		"RQUDP region:", "PBL region:", "TXPBL region:", "ULPRX state:",
4051 		"ULPTX state:", "On-chip queues:"
4052 	};
4053 	struct mem_desc avail[3];
4054 	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
4055 	struct mem_desc *md = mem;
4056 
4057 	rc = sysctl_wire_old_buffer(req, 0);
4058 	if (rc != 0)
4059 		return (rc);
4060 
4061 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
4062 	if (sb == NULL)
4063 		return (ENOMEM);
4064 
4065 	for (i = 0; i < nitems(mem); i++) {
4066 		mem[i].limit = 0;
4067 		mem[i].idx = i;
4068 	}
4069 
4070 	/* Find and sort the populated memory ranges */
4071 	i = 0;
4072 	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
4073 	if (lo & F_EDRAM0_ENABLE) {
4074 		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
4075 		avail[i].base = G_EDRAM0_BASE(hi) << 20;
4076 		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
4077 		avail[i].idx = 0;
4078 		i++;
4079 	}
4080 	if (lo & F_EDRAM1_ENABLE) {
4081 		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
4082 		avail[i].base = G_EDRAM1_BASE(hi) << 20;
4083 		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
4084 		avail[i].idx = 1;
4085 		i++;
4086 	}
4087 	if (lo & F_EXT_MEM_ENABLE) {
4088 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
4089 		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
4090 		avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20);
4091 		avail[i].idx = 2;
4092 		i++;
4093 	}
4094 	if (!i)                                    /* no memory available */
4095 		return 0;
4096 	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
4097 
4098 	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
4099 	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
4100 	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
4101 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
4102 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
4103 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
4104 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
4105 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
4106 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
4107 
4108 	/* the next few have explicit upper bounds */
4109 	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
4110 	md->limit = md->base - 1 +
4111 		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
4112 		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
4113 	md++;
4114 
4115 	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
4116 	md->limit = md->base - 1 +
4117 		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
4118 		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
4119 	md++;
4120 
4121 	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
4122 		hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
4123 		md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
4124 		md->limit = (sc->tids.ntids - hi) * 16 + md->base - 1;
4125 	} else {
4126 		md->base = 0;
4127 		md->idx = nitems(region);  /* hide it */
4128 	}
4129 	md++;
4130 
4131 #define ulp_region(reg) \
4132 	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
4133 	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
4134 
4135 	ulp_region(RX_ISCSI);
4136 	ulp_region(RX_TDDP);
4137 	ulp_region(TX_TPT);
4138 	ulp_region(RX_STAG);
4139 	ulp_region(RX_RQ);
4140 	ulp_region(RX_RQUDP);
4141 	ulp_region(RX_PBL);
4142 	ulp_region(TX_PBL);
4143 #undef ulp_region
4144 
4145 	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
4146 	md->limit = md->base + sc->tids.ntids - 1;
4147 	md++;
4148 	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
4149 	md->limit = md->base + sc->tids.ntids - 1;
4150 	md++;
4151 
4152 	md->base = sc->vres.ocq.start;
4153 	if (sc->vres.ocq.size)
4154 		md->limit = md->base + sc->vres.ocq.size - 1;
4155 	else
4156 		md->idx = nitems(region);  /* hide it */
4157 	md++;
4158 
4159 	/* add any address-space holes, there can be up to 3 */
4160 	for (n = 0; n < i - 1; n++)
4161 		if (avail[n].limit < avail[n + 1].base)
4162 			(md++)->base = avail[n].limit;
4163 	if (avail[n].limit)
4164 		(md++)->base = avail[n].limit;
4165 
4166 	n = md - mem;
4167 	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
4168 
4169 	for (lo = 0; lo < i; lo++)
4170 		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
4171 				avail[lo].limit - 1);
4172 
4173 	sbuf_printf(sb, "\n");
4174 	for (i = 0; i < n; i++) {
4175 		if (mem[i].idx >= nitems(region))
4176 			continue;                        /* skip holes */
4177 		if (!mem[i].limit)
4178 			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
4179 		mem_region_show(sb, region[mem[i].idx], mem[i].base,
4180 				mem[i].limit);
4181 	}
4182 
4183 	sbuf_printf(sb, "\n");
4184 	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
4185 	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
4186 	mem_region_show(sb, "uP RAM:", lo, hi);
4187 
4188 	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
4189 	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
4190 	mem_region_show(sb, "uP Extmem2:", lo, hi);
4191 
4192 	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
4193 	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
4194 		   G_PMRXMAXPAGE(lo),
4195 		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
4196 		   (lo & F_PMRXNUMCHN) ? 2 : 1);
4197 
4198 	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
4199 	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
4200 	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
4201 		   G_PMTXMAXPAGE(lo),
4202 		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
4203 		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
4204 	sbuf_printf(sb, "%u p-structs\n",
4205 		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
4206 
4207 	for (i = 0; i < 4; i++) {
4208 		lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
4209 		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
4210 			   i, G_USED(lo), G_ALLOC(lo));
4211 	}
4212 	for (i = 0; i < 4; i++) {
4213 		lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
4214 		sbuf_printf(sb,
4215 			   "\nLoopback %d using %u pages out of %u allocated",
4216 			   i, G_USED(lo), G_ALLOC(lo));
4217 	}
4218 
4219 	rc = sbuf_finish(sb);
4220 	sbuf_delete(sb);
4221 
4222 	return (rc);
4223 }
4224 
4225 static int
4226 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
4227 {
4228 	struct adapter *sc = arg1;
4229 	struct sbuf *sb;
4230 	int rc;
4231 	uint16_t mtus[NMTUS];
4232 
4233 	rc = sysctl_wire_old_buffer(req, 0);
4234 	if (rc != 0)
4235 		return (rc);
4236 
4237 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
4238 	if (sb == NULL)
4239 		return (ENOMEM);
4240 
4241 	t4_read_mtu_tbl(sc, mtus, NULL);
4242 
4243 	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
4244 	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
4245 	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
4246 	    mtus[14], mtus[15]);
4247 
4248 	rc = sbuf_finish(sb);
4249 	sbuf_delete(sb);
4250 
4251 	return (rc);
4252 }
4253 
4254 static int
4255 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
4256 {
4257 	struct adapter *sc = arg1;
4258 	struct sbuf *sb;
4259 	int rc, i;
4260 	uint32_t tx_cnt[PM_NSTATS], rx_cnt[PM_NSTATS];
4261 	uint64_t tx_cyc[PM_NSTATS], rx_cyc[PM_NSTATS];
4262 	static const char *pm_stats[] = {
4263 		"Read:", "Write bypass:", "Write mem:", "Flush:", "FIFO wait:"
4264 	};
4265 
4266 	rc = sysctl_wire_old_buffer(req, 0);
4267 	if (rc != 0)
4268 		return (rc);
4269 
4270 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
4271 	if (sb == NULL)
4272 		return (ENOMEM);
4273 
4274 	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
4275 	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
4276 
4277 	sbuf_printf(sb, "                Tx count            Tx cycles    "
4278 	    "Rx count            Rx cycles");
4279 	for (i = 0; i < PM_NSTATS; i++)
4280 		sbuf_printf(sb, "\n%-13s %10u %20ju  %10u %20ju",
4281 		    pm_stats[i], tx_cnt[i], tx_cyc[i], rx_cnt[i], rx_cyc[i]);
4282 
4283 	rc = sbuf_finish(sb);
4284 	sbuf_delete(sb);
4285 
4286 	return (rc);
4287 }
4288 
4289 static int
4290 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
4291 {
4292 	struct adapter *sc = arg1;
4293 	struct sbuf *sb;
4294 	int rc;
4295 	struct tp_rdma_stats stats;
4296 
4297 	rc = sysctl_wire_old_buffer(req, 0);
4298 	if (rc != 0)
4299 		return (rc);
4300 
4301 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
4302 	if (sb == NULL)
4303 		return (ENOMEM);
4304 
4305 	t4_tp_get_rdma_stats(sc, &stats);
4306 	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
4307 	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
4308 
4309 	rc = sbuf_finish(sb);
4310 	sbuf_delete(sb);
4311 
4312 	return (rc);
4313 }
4314 
4315 static int
4316 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
4317 {
4318 	struct adapter *sc = arg1;
4319 	struct sbuf *sb;
4320 	int rc;
4321 	struct tp_tcp_stats v4, v6;
4322 
4323 	rc = sysctl_wire_old_buffer(req, 0);
4324 	if (rc != 0)
4325 		return (rc);
4326 
4327 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
4328 	if (sb == NULL)
4329 		return (ENOMEM);
4330 
4331 	t4_tp_get_tcp_stats(sc, &v4, &v6);
4332 	sbuf_printf(sb,
4333 	    "                                IP                 IPv6\n");
4334 	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
4335 	    v4.tcpOutRsts, v6.tcpOutRsts);
4336 	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
4337 	    v4.tcpInSegs, v6.tcpInSegs);
4338 	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
4339 	    v4.tcpOutSegs, v6.tcpOutSegs);
4340 	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
4341 	    v4.tcpRetransSegs, v6.tcpRetransSegs);
4342 
4343 	rc = sbuf_finish(sb);
4344 	sbuf_delete(sb);
4345 
4346 	return (rc);
4347 }
4348 
4349 static int
4350 sysctl_tids(SYSCTL_HANDLER_ARGS)
4351 {
4352 	struct adapter *sc = arg1;
4353 	struct sbuf *sb;
4354 	int rc;
4355 	struct tid_info *t = &sc->tids;
4356 
4357 	rc = sysctl_wire_old_buffer(req, 0);
4358 	if (rc != 0)
4359 		return (rc);
4360 
4361 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
4362 	if (sb == NULL)
4363 		return (ENOMEM);
4364 
4365 	if (t->natids) {
4366 		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
4367 		    t->atids_in_use);
4368 	}
4369 
4370 	if (t->ntids) {
4371 		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
4372 			uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
4373 
4374 			if (b) {
4375 				sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
4376 				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
4377 				    t->ntids - 1);
4378 			} else {
4379 				sbuf_printf(sb, "TID range: %u-%u",
4380 				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
4381 				    t->ntids - 1);
4382 			}
4383 		} else
4384 			sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
4385 		sbuf_printf(sb, ", in use: %u\n",
4386 		    atomic_load_acq_int(&t->tids_in_use));
4387 	}
4388 
4389 	if (t->nstids) {
4390 		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
4391 		    t->stid_base + t->nstids - 1, t->stids_in_use);
4392 	}
4393 
4394 	if (t->nftids) {
4395 		sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
4396 		    t->ftid_base + t->nftids - 1);
4397 	}
4398 
4399 	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
4400 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
4401 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
4402 
4403 	rc = sbuf_finish(sb);
4404 	sbuf_delete(sb);
4405 
4406 	return (rc);
4407 }
4408 
4409 static int
4410 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
4411 {
4412 	struct adapter *sc = arg1;
4413 	struct sbuf *sb;
4414 	int rc;
4415 	struct tp_err_stats stats;
4416 
4417 	rc = sysctl_wire_old_buffer(req, 0);
4418 	if (rc != 0)
4419 		return (rc);
4420 
4421 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
4422 	if (sb == NULL)
4423 		return (ENOMEM);
4424 
4425 	t4_tp_get_err_stats(sc, &stats);
4426 
4427 	sbuf_printf(sb, "                 channel 0  channel 1  channel 2  "
4428 		      "channel 3\n");
4429 	sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
4430 	    stats.macInErrs[0], stats.macInErrs[1], stats.macInErrs[2],
4431 	    stats.macInErrs[3]);
4432 	sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
4433 	    stats.hdrInErrs[0], stats.hdrInErrs[1], stats.hdrInErrs[2],
4434 	    stats.hdrInErrs[3]);
4435 	sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
4436 	    stats.tcpInErrs[0], stats.tcpInErrs[1], stats.tcpInErrs[2],
4437 	    stats.tcpInErrs[3]);
4438 	sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
4439 	    stats.tcp6InErrs[0], stats.tcp6InErrs[1], stats.tcp6InErrs[2],
4440 	    stats.tcp6InErrs[3]);
4441 	sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
4442 	    stats.tnlCongDrops[0], stats.tnlCongDrops[1], stats.tnlCongDrops[2],
4443 	    stats.tnlCongDrops[3]);
4444 	sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
4445 	    stats.tnlTxDrops[0], stats.tnlTxDrops[1], stats.tnlTxDrops[2],
4446 	    stats.tnlTxDrops[3]);
4447 	sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
4448 	    stats.ofldVlanDrops[0], stats.ofldVlanDrops[1],
4449 	    stats.ofldVlanDrops[2], stats.ofldVlanDrops[3]);
4450 	sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
4451 	    stats.ofldChanDrops[0], stats.ofldChanDrops[1],
4452 	    stats.ofldChanDrops[2], stats.ofldChanDrops[3]);
4453 	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
4454 	    stats.ofldNoNeigh, stats.ofldCongDefer);
4455 
4456 	rc = sbuf_finish(sb);
4457 	sbuf_delete(sb);
4458 
4459 	return (rc);
4460 }
4461 
4462 static int
4463 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
4464 {
4465 	struct adapter *sc = arg1;
4466 	struct sbuf *sb;
4467 	int rc;
4468 	u64 nrate[NCHAN], orate[NCHAN];
4469 
4470 	rc = sysctl_wire_old_buffer(req, 0);
4471 	if (rc != 0)
4472 		return (rc);
4473 
4474 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
4475 	if (sb == NULL)
4476 		return (ENOMEM);
4477 
4478 	t4_get_chan_txrate(sc, nrate, orate);
4479 	sbuf_printf(sb, "              channel 0   channel 1   channel 2   "
4480 		 "channel 3\n");
4481 	sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
4482 	    nrate[0], nrate[1], nrate[2], nrate[3]);
4483 	sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
4484 	    orate[0], orate[1], orate[2], orate[3]);
4485 
4486 	rc = sbuf_finish(sb);
4487 	sbuf_delete(sb);
4488 
4489 	return (rc);
4490 }
4491 #endif
4492 
4493 static inline void
4494 txq_start(struct ifnet *ifp, struct sge_txq *txq)
4495 {
4496 	struct buf_ring *br;
4497 	struct mbuf *m;
4498 
4499 	TXQ_LOCK_ASSERT_OWNED(txq);
4500 
4501 	br = txq->br;
4502 	m = txq->m ? txq->m : drbr_dequeue(ifp, br);
4503 	if (m)
4504 		t4_eth_tx(ifp, txq, m);
4505 }
4506 
4507 void
4508 t4_tx_callout(void *arg)
4509 {
4510 	struct sge_eq *eq = arg;
4511 	struct adapter *sc;
4512 
4513 	if (EQ_TRYLOCK(eq) == 0)
4514 		goto reschedule;
4515 
4516 	if (eq->flags & EQ_STALLED && !can_resume_tx(eq)) {
4517 		EQ_UNLOCK(eq);
4518 reschedule:
4519 		if (__predict_true(!(eq->flags && EQ_DOOMED)))
4520 			callout_schedule(&eq->tx_callout, 1);
4521 		return;
4522 	}
4523 
4524 	EQ_LOCK_ASSERT_OWNED(eq);
4525 
4526 	if (__predict_true((eq->flags & EQ_DOOMED) == 0)) {
4527 
4528 		if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) {
4529 			struct sge_txq *txq = arg;
4530 			struct port_info *pi = txq->ifp->if_softc;
4531 
4532 			sc = pi->adapter;
4533 		} else {
4534 			struct sge_wrq *wrq = arg;
4535 
4536 			sc = wrq->adapter;
4537 		}
4538 
4539 		taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
4540 	}
4541 
4542 	EQ_UNLOCK(eq);
4543 }
4544 
4545 void
4546 t4_tx_task(void *arg, int count)
4547 {
4548 	struct sge_eq *eq = arg;
4549 
4550 	EQ_LOCK(eq);
4551 	if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) {
4552 		struct sge_txq *txq = arg;
4553 		txq_start(txq->ifp, txq);
4554 	} else {
4555 		struct sge_wrq *wrq = arg;
4556 		t4_wrq_tx_locked(wrq->adapter, wrq, NULL);
4557 	}
4558 	EQ_UNLOCK(eq);
4559 }
4560 
4561 static uint32_t
4562 fconf_to_mode(uint32_t fconf)
4563 {
4564 	uint32_t mode;
4565 
4566 	mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
4567 	    T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
4568 
4569 	if (fconf & F_FRAGMENTATION)
4570 		mode |= T4_FILTER_IP_FRAGMENT;
4571 
4572 	if (fconf & F_MPSHITTYPE)
4573 		mode |= T4_FILTER_MPS_HIT_TYPE;
4574 
4575 	if (fconf & F_MACMATCH)
4576 		mode |= T4_FILTER_MAC_IDX;
4577 
4578 	if (fconf & F_ETHERTYPE)
4579 		mode |= T4_FILTER_ETH_TYPE;
4580 
4581 	if (fconf & F_PROTOCOL)
4582 		mode |= T4_FILTER_IP_PROTO;
4583 
4584 	if (fconf & F_TOS)
4585 		mode |= T4_FILTER_IP_TOS;
4586 
4587 	if (fconf & F_VLAN)
4588 		mode |= T4_FILTER_VLAN;
4589 
4590 	if (fconf & F_VNIC_ID)
4591 		mode |= T4_FILTER_VNIC;
4592 
4593 	if (fconf & F_PORT)
4594 		mode |= T4_FILTER_PORT;
4595 
4596 	if (fconf & F_FCOE)
4597 		mode |= T4_FILTER_FCoE;
4598 
4599 	return (mode);
4600 }
4601 
4602 static uint32_t
4603 mode_to_fconf(uint32_t mode)
4604 {
4605 	uint32_t fconf = 0;
4606 
4607 	if (mode & T4_FILTER_IP_FRAGMENT)
4608 		fconf |= F_FRAGMENTATION;
4609 
4610 	if (mode & T4_FILTER_MPS_HIT_TYPE)
4611 		fconf |= F_MPSHITTYPE;
4612 
4613 	if (mode & T4_FILTER_MAC_IDX)
4614 		fconf |= F_MACMATCH;
4615 
4616 	if (mode & T4_FILTER_ETH_TYPE)
4617 		fconf |= F_ETHERTYPE;
4618 
4619 	if (mode & T4_FILTER_IP_PROTO)
4620 		fconf |= F_PROTOCOL;
4621 
4622 	if (mode & T4_FILTER_IP_TOS)
4623 		fconf |= F_TOS;
4624 
4625 	if (mode & T4_FILTER_VLAN)
4626 		fconf |= F_VLAN;
4627 
4628 	if (mode & T4_FILTER_VNIC)
4629 		fconf |= F_VNIC_ID;
4630 
4631 	if (mode & T4_FILTER_PORT)
4632 		fconf |= F_PORT;
4633 
4634 	if (mode & T4_FILTER_FCoE)
4635 		fconf |= F_FCOE;
4636 
4637 	return (fconf);
4638 }
4639 
4640 static uint32_t
4641 fspec_to_fconf(struct t4_filter_specification *fs)
4642 {
4643 	uint32_t fconf = 0;
4644 
4645 	if (fs->val.frag || fs->mask.frag)
4646 		fconf |= F_FRAGMENTATION;
4647 
4648 	if (fs->val.matchtype || fs->mask.matchtype)
4649 		fconf |= F_MPSHITTYPE;
4650 
4651 	if (fs->val.macidx || fs->mask.macidx)
4652 		fconf |= F_MACMATCH;
4653 
4654 	if (fs->val.ethtype || fs->mask.ethtype)
4655 		fconf |= F_ETHERTYPE;
4656 
4657 	if (fs->val.proto || fs->mask.proto)
4658 		fconf |= F_PROTOCOL;
4659 
4660 	if (fs->val.tos || fs->mask.tos)
4661 		fconf |= F_TOS;
4662 
4663 	if (fs->val.vlan_vld || fs->mask.vlan_vld)
4664 		fconf |= F_VLAN;
4665 
4666 	if (fs->val.vnic_vld || fs->mask.vnic_vld)
4667 		fconf |= F_VNIC_ID;
4668 
4669 	if (fs->val.iport || fs->mask.iport)
4670 		fconf |= F_PORT;
4671 
4672 	if (fs->val.fcoe || fs->mask.fcoe)
4673 		fconf |= F_FCOE;
4674 
4675 	return (fconf);
4676 }
4677 
4678 static int
4679 get_filter_mode(struct adapter *sc, uint32_t *mode)
4680 {
4681 	int rc;
4682 	uint32_t fconf;
4683 
4684 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
4685 	    "t4getfm");
4686 	if (rc)
4687 		return (rc);
4688 
4689 	t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1,
4690 	    A_TP_VLAN_PRI_MAP);
4691 
4692 	if (sc->filter_mode != fconf) {
4693 		log(LOG_WARNING, "%s: cached filter mode out of sync %x %x.\n",
4694 		    device_get_nameunit(sc->dev), sc->filter_mode, fconf);
4695 		sc->filter_mode = fconf;
4696 	}
4697 
4698 	*mode = fconf_to_mode(sc->filter_mode);
4699 
4700 	end_synchronized_op(sc, LOCK_HELD);
4701 	return (0);
4702 }
4703 
4704 static int
4705 set_filter_mode(struct adapter *sc, uint32_t mode)
4706 {
4707 	uint32_t fconf;
4708 	int rc;
4709 
4710 	fconf = mode_to_fconf(mode);
4711 
4712 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
4713 	    "t4setfm");
4714 	if (rc)
4715 		return (rc);
4716 
4717 	if (sc->tids.ftids_in_use > 0) {
4718 		rc = EBUSY;
4719 		goto done;
4720 	}
4721 
4722 #ifdef TCP_OFFLOAD
4723 	if (sc->offload_map) {
4724 		rc = EBUSY;
4725 		goto done;
4726 	}
4727 #endif
4728 
4729 #ifdef notyet
4730 	rc = -t4_set_filter_mode(sc, fconf);
4731 	if (rc == 0)
4732 		sc->filter_mode = fconf;
4733 #else
4734 	rc = ENOTSUP;
4735 #endif
4736 
4737 done:
4738 	end_synchronized_op(sc, LOCK_HELD);
4739 	return (rc);
4740 }
4741 
4742 static inline uint64_t
4743 get_filter_hits(struct adapter *sc, uint32_t fid)
4744 {
4745 	uint32_t tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
4746 	uint64_t hits;
4747 
4748 	t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 0),
4749 	    tcb_base + (fid + sc->tids.ftid_base) * TCB_SIZE);
4750 	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 0));
4751 	hits = t4_read_reg64(sc, MEMWIN0_BASE + 16);
4752 
4753 	return (be64toh(hits));
4754 }
4755 
4756 static int
4757 get_filter(struct adapter *sc, struct t4_filter *t)
4758 {
4759 	int i, rc, nfilters = sc->tids.nftids;
4760 	struct filter_entry *f;
4761 
4762 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
4763 	    "t4getf");
4764 	if (rc)
4765 		return (rc);
4766 
4767 	if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
4768 	    t->idx >= nfilters) {
4769 		t->idx = 0xffffffff;
4770 		goto done;
4771 	}
4772 
4773 	f = &sc->tids.ftid_tab[t->idx];
4774 	for (i = t->idx; i < nfilters; i++, f++) {
4775 		if (f->valid) {
4776 			t->idx = i;
4777 			t->l2tidx = f->l2t ? f->l2t->idx : 0;
4778 			t->smtidx = f->smtidx;
4779 			if (f->fs.hitcnts)
4780 				t->hits = get_filter_hits(sc, t->idx);
4781 			else
4782 				t->hits = UINT64_MAX;
4783 			t->fs = f->fs;
4784 
4785 			goto done;
4786 		}
4787 	}
4788 
4789 	t->idx = 0xffffffff;
4790 done:
4791 	end_synchronized_op(sc, LOCK_HELD);
4792 	return (0);
4793 }
4794 
4795 static int
4796 set_filter(struct adapter *sc, struct t4_filter *t)
4797 {
4798 	unsigned int nfilters, nports;
4799 	struct filter_entry *f;
4800 	int i, rc;
4801 
4802 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
4803 	if (rc)
4804 		return (rc);
4805 
4806 	nfilters = sc->tids.nftids;
4807 	nports = sc->params.nports;
4808 
4809 	if (nfilters == 0) {
4810 		rc = ENOTSUP;
4811 		goto done;
4812 	}
4813 
4814 	if (!(sc->flags & FULL_INIT_DONE)) {
4815 		rc = EAGAIN;
4816 		goto done;
4817 	}
4818 
4819 	if (t->idx >= nfilters) {
4820 		rc = EINVAL;
4821 		goto done;
4822 	}
4823 
4824 	/* Validate against the global filter mode */
4825 	if ((sc->filter_mode | fspec_to_fconf(&t->fs)) != sc->filter_mode) {
4826 		rc = E2BIG;
4827 		goto done;
4828 	}
4829 
4830 	if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
4831 		rc = EINVAL;
4832 		goto done;
4833 	}
4834 
4835 	if (t->fs.val.iport >= nports) {
4836 		rc = EINVAL;
4837 		goto done;
4838 	}
4839 
4840 	/* Can't specify an iq if not steering to it */
4841 	if (!t->fs.dirsteer && t->fs.iq) {
4842 		rc = EINVAL;
4843 		goto done;
4844 	}
4845 
4846 	/* IPv6 filter idx must be 4 aligned */
4847 	if (t->fs.type == 1 &&
4848 	    ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
4849 		rc = EINVAL;
4850 		goto done;
4851 	}
4852 
4853 	if (sc->tids.ftid_tab == NULL) {
4854 		KASSERT(sc->tids.ftids_in_use == 0,
4855 		    ("%s: no memory allocated but filters_in_use > 0",
4856 		    __func__));
4857 
4858 		sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
4859 		    nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
4860 		if (sc->tids.ftid_tab == NULL) {
4861 			rc = ENOMEM;
4862 			goto done;
4863 		}
4864 		mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
4865 	}
4866 
4867 	for (i = 0; i < 4; i++) {
4868 		f = &sc->tids.ftid_tab[t->idx + i];
4869 
4870 		if (f->pending || f->valid) {
4871 			rc = EBUSY;
4872 			goto done;
4873 		}
4874 		if (f->locked) {
4875 			rc = EPERM;
4876 			goto done;
4877 		}
4878 
4879 		if (t->fs.type == 0)
4880 			break;
4881 	}
4882 
4883 	f = &sc->tids.ftid_tab[t->idx];
4884 	f->fs = t->fs;
4885 
4886 	rc = set_filter_wr(sc, t->idx);
4887 done:
4888 	end_synchronized_op(sc, 0);
4889 
4890 	if (rc == 0) {
4891 		mtx_lock(&sc->tids.ftid_lock);
4892 		for (;;) {
4893 			if (f->pending == 0) {
4894 				rc = f->valid ? 0 : EIO;
4895 				break;
4896 			}
4897 
4898 			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
4899 			    PCATCH, "t4setfw", 0)) {
4900 				rc = EINPROGRESS;
4901 				break;
4902 			}
4903 		}
4904 		mtx_unlock(&sc->tids.ftid_lock);
4905 	}
4906 	return (rc);
4907 }
4908 
4909 static int
4910 del_filter(struct adapter *sc, struct t4_filter *t)
4911 {
4912 	unsigned int nfilters;
4913 	struct filter_entry *f;
4914 	int rc;
4915 
4916 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
4917 	if (rc)
4918 		return (rc);
4919 
4920 	nfilters = sc->tids.nftids;
4921 
4922 	if (nfilters == 0) {
4923 		rc = ENOTSUP;
4924 		goto done;
4925 	}
4926 
4927 	if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
4928 	    t->idx >= nfilters) {
4929 		rc = EINVAL;
4930 		goto done;
4931 	}
4932 
4933 	if (!(sc->flags & FULL_INIT_DONE)) {
4934 		rc = EAGAIN;
4935 		goto done;
4936 	}
4937 
4938 	f = &sc->tids.ftid_tab[t->idx];
4939 
4940 	if (f->pending) {
4941 		rc = EBUSY;
4942 		goto done;
4943 	}
4944 	if (f->locked) {
4945 		rc = EPERM;
4946 		goto done;
4947 	}
4948 
4949 	if (f->valid) {
4950 		t->fs = f->fs;	/* extra info for the caller */
4951 		rc = del_filter_wr(sc, t->idx);
4952 	}
4953 
4954 done:
4955 	end_synchronized_op(sc, 0);
4956 
4957 	if (rc == 0) {
4958 		mtx_lock(&sc->tids.ftid_lock);
4959 		for (;;) {
4960 			if (f->pending == 0) {
4961 				rc = f->valid ? EIO : 0;
4962 				break;
4963 			}
4964 
4965 			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
4966 			    PCATCH, "t4delfw", 0)) {
4967 				rc = EINPROGRESS;
4968 				break;
4969 			}
4970 		}
4971 		mtx_unlock(&sc->tids.ftid_lock);
4972 	}
4973 
4974 	return (rc);
4975 }
4976 
4977 static void
4978 clear_filter(struct filter_entry *f)
4979 {
4980 	if (f->l2t)
4981 		t4_l2t_release(f->l2t);
4982 
4983 	bzero(f, sizeof (*f));
4984 }
4985 
4986 static int
4987 set_filter_wr(struct adapter *sc, int fidx)
4988 {
4989 	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
4990 	struct wrqe *wr;
4991 	struct fw_filter_wr *fwr;
4992 	unsigned int ftid;
4993 
4994 	ASSERT_SYNCHRONIZED_OP(sc);
4995 
4996 	if (f->fs.newdmac || f->fs.newvlan) {
4997 		/* This filter needs an L2T entry; allocate one. */
4998 		f->l2t = t4_l2t_alloc_switching(sc->l2t);
4999 		if (f->l2t == NULL)
5000 			return (EAGAIN);
5001 		if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport,
5002 		    f->fs.dmac)) {
5003 			t4_l2t_release(f->l2t);
5004 			f->l2t = NULL;
5005 			return (ENOMEM);
5006 		}
5007 	}
5008 
5009 	ftid = sc->tids.ftid_base + fidx;
5010 
5011 	wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq);
5012 	if (wr == NULL)
5013 		return (ENOMEM);
5014 
5015 	fwr = wrtod(wr);
5016 	bzero(fwr, sizeof (*fwr));
5017 
5018 	fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
5019 	fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
5020 	fwr->tid_to_iq =
5021 	    htobe32(V_FW_FILTER_WR_TID(ftid) |
5022 		V_FW_FILTER_WR_RQTYPE(f->fs.type) |
5023 		V_FW_FILTER_WR_NOREPLY(0) |
5024 		V_FW_FILTER_WR_IQ(f->fs.iq));
5025 	fwr->del_filter_to_l2tix =
5026 	    htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
5027 		V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
5028 		V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
5029 		V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
5030 		V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
5031 		V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
5032 		V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
5033 		V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
5034 		V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
5035 		    f->fs.newvlan == VLAN_REWRITE) |
5036 		V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
5037 		    f->fs.newvlan == VLAN_REWRITE) |
5038 		V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
5039 		V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
5040 		V_FW_FILTER_WR_PRIO(f->fs.prio) |
5041 		V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
5042 	fwr->ethtype = htobe16(f->fs.val.ethtype);
5043 	fwr->ethtypem = htobe16(f->fs.mask.ethtype);
5044 	fwr->frag_to_ovlan_vldm =
5045 	    (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
5046 		V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
5047 		V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
5048 		V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.vnic_vld) |
5049 		V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
5050 		V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.vnic_vld));
5051 	fwr->smac_sel = 0;
5052 	fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
5053 	    V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
5054 	fwr->maci_to_matchtypem =
5055 	    htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
5056 		V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
5057 		V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
5058 		V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
5059 		V_FW_FILTER_WR_PORT(f->fs.val.iport) |
5060 		V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
5061 		V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
5062 		V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
5063 	fwr->ptcl = f->fs.val.proto;
5064 	fwr->ptclm = f->fs.mask.proto;
5065 	fwr->ttyp = f->fs.val.tos;
5066 	fwr->ttypm = f->fs.mask.tos;
5067 	fwr->ivlan = htobe16(f->fs.val.vlan);
5068 	fwr->ivlanm = htobe16(f->fs.mask.vlan);
5069 	fwr->ovlan = htobe16(f->fs.val.vnic);
5070 	fwr->ovlanm = htobe16(f->fs.mask.vnic);
5071 	bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
5072 	bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
5073 	bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
5074 	bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
5075 	fwr->lp = htobe16(f->fs.val.dport);
5076 	fwr->lpm = htobe16(f->fs.mask.dport);
5077 	fwr->fp = htobe16(f->fs.val.sport);
5078 	fwr->fpm = htobe16(f->fs.mask.sport);
5079 	if (f->fs.newsmac)
5080 		bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma));
5081 
5082 	f->pending = 1;
5083 	sc->tids.ftids_in_use++;
5084 
5085 	t4_wrq_tx(sc, wr);
5086 	return (0);
5087 }
5088 
5089 static int
5090 del_filter_wr(struct adapter *sc, int fidx)
5091 {
5092 	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
5093 	struct wrqe *wr;
5094 	struct fw_filter_wr *fwr;
5095 	unsigned int ftid;
5096 
5097 	ftid = sc->tids.ftid_base + fidx;
5098 
5099 	wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq);
5100 	if (wr == NULL)
5101 		return (ENOMEM);
5102 	fwr = wrtod(wr);
5103 	bzero(fwr, sizeof (*fwr));
5104 
5105 	t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
5106 
5107 	f->pending = 1;
5108 	t4_wrq_tx(sc, wr);
5109 	return (0);
5110 }
5111 
5112 int
5113 t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
5114 {
5115 	struct adapter *sc = iq->adapter;
5116 	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
5117 	unsigned int idx = GET_TID(rpl);
5118 
5119 	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
5120 	    rss->opcode));
5121 
5122 	if (idx >= sc->tids.ftid_base &&
5123 	    (idx -= sc->tids.ftid_base) < sc->tids.nftids) {
5124 		unsigned int rc = G_COOKIE(rpl->cookie);
5125 		struct filter_entry *f = &sc->tids.ftid_tab[idx];
5126 
5127 		mtx_lock(&sc->tids.ftid_lock);
5128 		if (rc == FW_FILTER_WR_FLT_ADDED) {
5129 			KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
5130 			    __func__, idx));
5131 			f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
5132 			f->pending = 0;  /* asynchronous setup completed */
5133 			f->valid = 1;
5134 		} else {
5135 			if (rc != FW_FILTER_WR_FLT_DELETED) {
5136 				/* Add or delete failed, display an error */
5137 				log(LOG_ERR,
5138 				    "filter %u setup failed with error %u\n",
5139 				    idx, rc);
5140 			}
5141 
5142 			clear_filter(f);
5143 			sc->tids.ftids_in_use--;
5144 		}
5145 		wakeup(&sc->tids.ftid_tab);
5146 		mtx_unlock(&sc->tids.ftid_lock);
5147 	}
5148 
5149 	return (0);
5150 }
5151 
5152 static int
5153 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
5154 {
5155 	int rc;
5156 
5157 	if (cntxt->cid > M_CTXTQID)
5158 		return (EINVAL);
5159 
5160 	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
5161 	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
5162 		return (EINVAL);
5163 
5164 	if (sc->flags & FW_OK) {
5165 		rc = begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4ctxt");
5166 		if (rc == 0) {
5167 			rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid,
5168 			    cntxt->mem_id, &cntxt->data[0]);
5169 			end_synchronized_op(sc, LOCK_HELD);
5170 			if (rc == 0)
5171 				return (0);
5172 		}
5173 	}
5174 
5175 	/*
5176 	 * Read via firmware failed or wasn't even attempted.  Read directly via
5177 	 * the backdoor.
5178 	 */
5179 	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id,
5180 	    &cntxt->data[0]);
5181 	return (rc);
5182 }
5183 
5184 static int
5185 load_fw(struct adapter *sc, struct t4_data *fw)
5186 {
5187 	int rc;
5188 	uint8_t *fw_data;
5189 
5190 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
5191 	if (rc)
5192 		return (rc);
5193 
5194 	if (sc->flags & FULL_INIT_DONE) {
5195 		rc = EBUSY;
5196 		goto done;
5197 	}
5198 
5199 	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
5200 	if (fw_data == NULL) {
5201 		rc = ENOMEM;
5202 		goto done;
5203 	}
5204 
5205 	rc = copyin(fw->data, fw_data, fw->len);
5206 	if (rc == 0)
5207 		rc = -t4_load_fw(sc, fw_data, fw->len);
5208 
5209 	free(fw_data, M_CXGBE);
5210 done:
5211 	end_synchronized_op(sc, 0);
5212 	return (rc);
5213 }
5214 
5215 static int
5216 read_card_mem(struct adapter *sc, struct t4_mem_range *mr)
5217 {
5218 	uint32_t base, size, lo, hi, win, off, remaining, i, n;
5219 	uint32_t *buf, *b;
5220 	int rc;
5221 
5222 	/* reads are in multiples of 32 bits */
5223 	if (mr->addr & 3 || mr->len & 3 || mr->len == 0)
5224 		return (EINVAL);
5225 
5226 	/*
5227 	 * We don't want to deal with potential holes so we mandate that the
5228 	 * requested region must lie entirely within one of the 3 memories.
5229 	 */
5230 	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
5231 	if (lo & F_EDRAM0_ENABLE) {
5232 		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
5233 		base = G_EDRAM0_BASE(hi) << 20;
5234 		size = G_EDRAM0_SIZE(hi) << 20;
5235 		if (size > 0 &&
5236 		    mr->addr >= base && mr->addr < base + size &&
5237 		    mr->addr + mr->len <= base + size)
5238 			goto proceed;
5239 	}
5240 	if (lo & F_EDRAM1_ENABLE) {
5241 		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
5242 		base = G_EDRAM1_BASE(hi) << 20;
5243 		size = G_EDRAM1_SIZE(hi) << 20;
5244 		if (size > 0 &&
5245 		    mr->addr >= base && mr->addr < base + size &&
5246 		    mr->addr + mr->len <= base + size)
5247 			goto proceed;
5248 	}
5249 	if (lo & F_EXT_MEM_ENABLE) {
5250 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
5251 		base = G_EXT_MEM_BASE(hi) << 20;
5252 		size = G_EXT_MEM_SIZE(hi) << 20;
5253 		if (size > 0 &&
5254 		    mr->addr >= base && mr->addr < base + size &&
5255 		    mr->addr + mr->len <= base + size)
5256 			goto proceed;
5257 	}
5258 	return (ENXIO);
5259 
5260 proceed:
5261 	buf = b = malloc(mr->len, M_CXGBE, M_WAITOK);
5262 
5263 	/*
5264 	 * Position the PCIe window (we use memwin2) to the 16B aligned area
5265 	 * just at/before the requested region.
5266 	 */
5267 	win = mr->addr & ~0xf;
5268 	off = mr->addr - win;  /* offset of the requested region in the win */
5269 	remaining = mr->len;
5270 
5271 	while (remaining) {
5272 		t4_write_reg(sc,
5273 		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), win);
5274 		t4_read_reg(sc,
5275 		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2));
5276 
5277 		/* number of bytes that we'll copy in the inner loop */
5278 		n = min(remaining, MEMWIN2_APERTURE - off);
5279 
5280 		for (i = 0; i < n; i += 4, remaining -= 4)
5281 			*b++ = t4_read_reg(sc, MEMWIN2_BASE + off + i);
5282 
5283 		win += MEMWIN2_APERTURE;
5284 		off = 0;
5285 	}
5286 
5287 	rc = copyout(buf, mr->data, mr->len);
5288 	free(buf, M_CXGBE);
5289 
5290 	return (rc);
5291 }
5292 
5293 static int
5294 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
5295 {
5296 	int rc;
5297 
5298 	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
5299 		return (EINVAL);
5300 
5301 	if (i2cd->len > 1) {
5302 		/* XXX: need fw support for longer reads in one go */
5303 		return (ENOTSUP);
5304 	}
5305 
5306 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
5307 	if (rc)
5308 		return (rc);
5309 	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
5310 	    i2cd->offset, &i2cd->data[0]);
5311 	end_synchronized_op(sc, 0);
5312 
5313 	return (rc);
5314 }
5315 
5316 int
5317 t4_os_find_pci_capability(struct adapter *sc, int cap)
5318 {
5319 	int i;
5320 
5321 	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
5322 }
5323 
5324 int
5325 t4_os_pci_save_state(struct adapter *sc)
5326 {
5327 	device_t dev;
5328 	struct pci_devinfo *dinfo;
5329 
5330 	dev = sc->dev;
5331 	dinfo = device_get_ivars(dev);
5332 
5333 	pci_cfg_save(dev, dinfo, 0);
5334 	return (0);
5335 }
5336 
5337 int
5338 t4_os_pci_restore_state(struct adapter *sc)
5339 {
5340 	device_t dev;
5341 	struct pci_devinfo *dinfo;
5342 
5343 	dev = sc->dev;
5344 	dinfo = device_get_ivars(dev);
5345 
5346 	pci_cfg_restore(dev, dinfo);
5347 	return (0);
5348 }
5349 
5350 void
5351 t4_os_portmod_changed(const struct adapter *sc, int idx)
5352 {
5353 	struct port_info *pi = sc->port[idx];
5354 	static const char *mod_str[] = {
5355 		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
5356 	};
5357 
5358 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
5359 		if_printf(pi->ifp, "transceiver unplugged.\n");
5360 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
5361 		if_printf(pi->ifp, "unknown transceiver inserted.\n");
5362 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
5363 		if_printf(pi->ifp, "unsupported transceiver inserted.\n");
5364 	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
5365 		if_printf(pi->ifp, "%s transceiver inserted.\n",
5366 		    mod_str[pi->mod_type]);
5367 	} else {
5368 		if_printf(pi->ifp, "transceiver (type %d) inserted.\n",
5369 		    pi->mod_type);
5370 	}
5371 }
5372 
5373 void
5374 t4_os_link_changed(struct adapter *sc, int idx, int link_stat)
5375 {
5376 	struct port_info *pi = sc->port[idx];
5377 	struct ifnet *ifp = pi->ifp;
5378 
5379 	if (link_stat) {
5380 		ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed);
5381 		if_link_state_change(ifp, LINK_STATE_UP);
5382 	} else
5383 		if_link_state_change(ifp, LINK_STATE_DOWN);
5384 }
5385 
5386 void
5387 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
5388 {
5389 	struct adapter *sc;
5390 
5391 	mtx_lock(&t4_list_lock);
5392 	SLIST_FOREACH(sc, &t4_list, link) {
5393 		/*
5394 		 * func should not make any assumptions about what state sc is
5395 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
5396 		 */
5397 		func(sc, arg);
5398 	}
5399 	mtx_unlock(&t4_list_lock);
5400 }
5401 
5402 static int
5403 t4_open(struct cdev *dev, int flags, int type, struct thread *td)
5404 {
5405        return (0);
5406 }
5407 
5408 static int
5409 t4_close(struct cdev *dev, int flags, int type, struct thread *td)
5410 {
5411        return (0);
5412 }
5413 
5414 static int
5415 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
5416     struct thread *td)
5417 {
5418 	int rc;
5419 	struct adapter *sc = dev->si_drv1;
5420 
5421 	rc = priv_check(td, PRIV_DRIVER);
5422 	if (rc != 0)
5423 		return (rc);
5424 
5425 	switch (cmd) {
5426 	case CHELSIO_T4_GETREG: {
5427 		struct t4_reg *edata = (struct t4_reg *)data;
5428 
5429 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
5430 			return (EFAULT);
5431 
5432 		if (edata->size == 4)
5433 			edata->val = t4_read_reg(sc, edata->addr);
5434 		else if (edata->size == 8)
5435 			edata->val = t4_read_reg64(sc, edata->addr);
5436 		else
5437 			return (EINVAL);
5438 
5439 		break;
5440 	}
5441 	case CHELSIO_T4_SETREG: {
5442 		struct t4_reg *edata = (struct t4_reg *)data;
5443 
5444 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
5445 			return (EFAULT);
5446 
5447 		if (edata->size == 4) {
5448 			if (edata->val & 0xffffffff00000000)
5449 				return (EINVAL);
5450 			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
5451 		} else if (edata->size == 8)
5452 			t4_write_reg64(sc, edata->addr, edata->val);
5453 		else
5454 			return (EINVAL);
5455 		break;
5456 	}
5457 	case CHELSIO_T4_REGDUMP: {
5458 		struct t4_regdump *regs = (struct t4_regdump *)data;
5459 		int reglen = T4_REGDUMP_SIZE;
5460 		uint8_t *buf;
5461 
5462 		if (regs->len < reglen) {
5463 			regs->len = reglen; /* hint to the caller */
5464 			return (ENOBUFS);
5465 		}
5466 
5467 		regs->len = reglen;
5468 		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
5469 		t4_get_regs(sc, regs, buf);
5470 		rc = copyout(buf, regs->data, reglen);
5471 		free(buf, M_CXGBE);
5472 		break;
5473 	}
5474 	case CHELSIO_T4_GET_FILTER_MODE:
5475 		rc = get_filter_mode(sc, (uint32_t *)data);
5476 		break;
5477 	case CHELSIO_T4_SET_FILTER_MODE:
5478 		rc = set_filter_mode(sc, *(uint32_t *)data);
5479 		break;
5480 	case CHELSIO_T4_GET_FILTER:
5481 		rc = get_filter(sc, (struct t4_filter *)data);
5482 		break;
5483 	case CHELSIO_T4_SET_FILTER:
5484 		rc = set_filter(sc, (struct t4_filter *)data);
5485 		break;
5486 	case CHELSIO_T4_DEL_FILTER:
5487 		rc = del_filter(sc, (struct t4_filter *)data);
5488 		break;
5489 	case CHELSIO_T4_GET_SGE_CONTEXT:
5490 		rc = get_sge_context(sc, (struct t4_sge_context *)data);
5491 		break;
5492 	case CHELSIO_T4_LOAD_FW:
5493 		rc = load_fw(sc, (struct t4_data *)data);
5494 		break;
5495 	case CHELSIO_T4_GET_MEM:
5496 		rc = read_card_mem(sc, (struct t4_mem_range *)data);
5497 		break;
5498 	case CHELSIO_T4_GET_I2C:
5499 		rc = read_i2c(sc, (struct t4_i2c_data *)data);
5500 		break;
5501 	case CHELSIO_T4_CLEAR_STATS: {
5502 		int i;
5503 		u_int port_id = *(uint32_t *)data;
5504 		struct port_info *pi;
5505 
5506 		if (port_id >= sc->params.nports)
5507 			return (EINVAL);
5508 
5509 		/* MAC stats */
5510 		t4_clr_port_stats(sc, port_id);
5511 
5512 		pi = sc->port[port_id];
5513 		if (pi->flags & PORT_INIT_DONE) {
5514 			struct sge_rxq *rxq;
5515 			struct sge_txq *txq;
5516 			struct sge_wrq *wrq;
5517 
5518 			for_each_rxq(pi, i, rxq) {
5519 #if defined(INET) || defined(INET6)
5520 				rxq->lro.lro_queued = 0;
5521 				rxq->lro.lro_flushed = 0;
5522 #endif
5523 				rxq->rxcsum = 0;
5524 				rxq->vlan_extraction = 0;
5525 			}
5526 
5527 			for_each_txq(pi, i, txq) {
5528 				txq->txcsum = 0;
5529 				txq->tso_wrs = 0;
5530 				txq->vlan_insertion = 0;
5531 				txq->imm_wrs = 0;
5532 				txq->sgl_wrs = 0;
5533 				txq->txpkt_wrs = 0;
5534 				txq->txpkts_wrs = 0;
5535 				txq->txpkts_pkts = 0;
5536 				txq->br->br_drops = 0;
5537 				txq->no_dmamap = 0;
5538 				txq->no_desc = 0;
5539 			}
5540 
5541 #ifdef TCP_OFFLOAD
5542 			/* nothing to clear for each ofld_rxq */
5543 
5544 			for_each_ofld_txq(pi, i, wrq) {
5545 				wrq->tx_wrs = 0;
5546 				wrq->no_desc = 0;
5547 			}
5548 #endif
5549 			wrq = &sc->sge.ctrlq[pi->port_id];
5550 			wrq->tx_wrs = 0;
5551 			wrq->no_desc = 0;
5552 		}
5553 		break;
5554 	}
5555 	default:
5556 		rc = EINVAL;
5557 	}
5558 
5559 	return (rc);
5560 }
5561 
5562 #ifdef TCP_OFFLOAD
5563 static int
5564 toe_capability(struct port_info *pi, int enable)
5565 {
5566 	int rc;
5567 	struct adapter *sc = pi->adapter;
5568 
5569 	ASSERT_SYNCHRONIZED_OP(sc);
5570 
5571 	if (!is_offload(sc))
5572 		return (ENODEV);
5573 
5574 	if (enable) {
5575 		if (!(sc->flags & FULL_INIT_DONE)) {
5576 			rc = cxgbe_init_synchronized(pi);
5577 			if (rc)
5578 				return (rc);
5579 		}
5580 
5581 		if (isset(&sc->offload_map, pi->port_id))
5582 			return (0);
5583 
5584 		if (!(sc->flags & TOM_INIT_DONE)) {
5585 			rc = t4_activate_uld(sc, ULD_TOM);
5586 			if (rc == EAGAIN) {
5587 				log(LOG_WARNING,
5588 				    "You must kldload t4_tom.ko before trying "
5589 				    "to enable TOE on a cxgbe interface.\n");
5590 			}
5591 			if (rc != 0)
5592 				return (rc);
5593 			KASSERT(sc->tom_softc != NULL,
5594 			    ("%s: TOM activated but softc NULL", __func__));
5595 			KASSERT(sc->flags & TOM_INIT_DONE,
5596 			    ("%s: TOM activated but flag not set", __func__));
5597 		}
5598 
5599 		setbit(&sc->offload_map, pi->port_id);
5600 	} else {
5601 		if (!isset(&sc->offload_map, pi->port_id))
5602 			return (0);
5603 
5604 		KASSERT(sc->flags & TOM_INIT_DONE,
5605 		    ("%s: TOM never initialized?", __func__));
5606 		clrbit(&sc->offload_map, pi->port_id);
5607 	}
5608 
5609 	return (0);
5610 }
5611 
5612 /*
5613  * Add an upper layer driver to the global list.
5614  */
5615 int
5616 t4_register_uld(struct uld_info *ui)
5617 {
5618 	int rc = 0;
5619 	struct uld_info *u;
5620 
5621 	mtx_lock(&t4_uld_list_lock);
5622 	SLIST_FOREACH(u, &t4_uld_list, link) {
5623 	    if (u->uld_id == ui->uld_id) {
5624 		    rc = EEXIST;
5625 		    goto done;
5626 	    }
5627 	}
5628 
5629 	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
5630 	ui->refcount = 0;
5631 done:
5632 	mtx_unlock(&t4_uld_list_lock);
5633 	return (rc);
5634 }
5635 
5636 int
5637 t4_unregister_uld(struct uld_info *ui)
5638 {
5639 	int rc = EINVAL;
5640 	struct uld_info *u;
5641 
5642 	mtx_lock(&t4_uld_list_lock);
5643 
5644 	SLIST_FOREACH(u, &t4_uld_list, link) {
5645 	    if (u == ui) {
5646 		    if (ui->refcount > 0) {
5647 			    rc = EBUSY;
5648 			    goto done;
5649 		    }
5650 
5651 		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
5652 		    rc = 0;
5653 		    goto done;
5654 	    }
5655 	}
5656 done:
5657 	mtx_unlock(&t4_uld_list_lock);
5658 	return (rc);
5659 }
5660 
5661 int
5662 t4_activate_uld(struct adapter *sc, int id)
5663 {
5664 	int rc = EAGAIN;
5665 	struct uld_info *ui;
5666 
5667 	ASSERT_SYNCHRONIZED_OP(sc);
5668 
5669 	mtx_lock(&t4_uld_list_lock);
5670 
5671 	SLIST_FOREACH(ui, &t4_uld_list, link) {
5672 		if (ui->uld_id == id) {
5673 			rc = ui->activate(sc);
5674 			if (rc == 0)
5675 				ui->refcount++;
5676 			goto done;
5677 		}
5678 	}
5679 done:
5680 	mtx_unlock(&t4_uld_list_lock);
5681 
5682 	return (rc);
5683 }
5684 
5685 int
5686 t4_deactivate_uld(struct adapter *sc, int id)
5687 {
5688 	int rc = EINVAL;
5689 	struct uld_info *ui;
5690 
5691 	ASSERT_SYNCHRONIZED_OP(sc);
5692 
5693 	mtx_lock(&t4_uld_list_lock);
5694 
5695 	SLIST_FOREACH(ui, &t4_uld_list, link) {
5696 		if (ui->uld_id == id) {
5697 			rc = ui->deactivate(sc);
5698 			if (rc == 0)
5699 				ui->refcount--;
5700 			goto done;
5701 		}
5702 	}
5703 done:
5704 	mtx_unlock(&t4_uld_list_lock);
5705 
5706 	return (rc);
5707 }
5708 #endif
5709 
5710 /*
5711  * Come up with reasonable defaults for some of the tunables, provided they're
5712  * not set by the user (in which case we'll use the values as is).
5713  */
5714 static void
5715 tweak_tunables(void)
5716 {
5717 	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
5718 
5719 	if (t4_ntxq10g < 1)
5720 		t4_ntxq10g = min(nc, NTXQ_10G);
5721 
5722 	if (t4_ntxq1g < 1)
5723 		t4_ntxq1g = min(nc, NTXQ_1G);
5724 
5725 	if (t4_nrxq10g < 1)
5726 		t4_nrxq10g = min(nc, NRXQ_10G);
5727 
5728 	if (t4_nrxq1g < 1)
5729 		t4_nrxq1g = min(nc, NRXQ_1G);
5730 
5731 #ifdef TCP_OFFLOAD
5732 	if (t4_nofldtxq10g < 1)
5733 		t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
5734 
5735 	if (t4_nofldtxq1g < 1)
5736 		t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
5737 
5738 	if (t4_nofldrxq10g < 1)
5739 		t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
5740 
5741 	if (t4_nofldrxq1g < 1)
5742 		t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
5743 
5744 	if (t4_toecaps_allowed == -1)
5745 		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
5746 #else
5747 	if (t4_toecaps_allowed == -1)
5748 		t4_toecaps_allowed = 0;
5749 #endif
5750 
5751 	if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
5752 		t4_tmr_idx_10g = TMR_IDX_10G;
5753 
5754 	if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
5755 		t4_pktc_idx_10g = PKTC_IDX_10G;
5756 
5757 	if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
5758 		t4_tmr_idx_1g = TMR_IDX_1G;
5759 
5760 	if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
5761 		t4_pktc_idx_1g = PKTC_IDX_1G;
5762 
5763 	if (t4_qsize_txq < 128)
5764 		t4_qsize_txq = 128;
5765 
5766 	if (t4_qsize_rxq < 128)
5767 		t4_qsize_rxq = 128;
5768 	while (t4_qsize_rxq & 7)
5769 		t4_qsize_rxq++;
5770 
5771 	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
5772 }
5773 
5774 static int
5775 t4_mod_event(module_t mod, int cmd, void *arg)
5776 {
5777 	int rc = 0;
5778 
5779 	switch (cmd) {
5780 	case MOD_LOAD:
5781 		t4_sge_modload();
5782 		mtx_init(&t4_list_lock, "T4 adapters", 0, MTX_DEF);
5783 		SLIST_INIT(&t4_list);
5784 #ifdef TCP_OFFLOAD
5785 		mtx_init(&t4_uld_list_lock, "T4 ULDs", 0, MTX_DEF);
5786 		SLIST_INIT(&t4_uld_list);
5787 #endif
5788 		tweak_tunables();
5789 		break;
5790 
5791 	case MOD_UNLOAD:
5792 #ifdef TCP_OFFLOAD
5793 		mtx_lock(&t4_uld_list_lock);
5794 		if (!SLIST_EMPTY(&t4_uld_list)) {
5795 			rc = EBUSY;
5796 			mtx_unlock(&t4_uld_list_lock);
5797 			break;
5798 		}
5799 		mtx_unlock(&t4_uld_list_lock);
5800 		mtx_destroy(&t4_uld_list_lock);
5801 #endif
5802 		mtx_lock(&t4_list_lock);
5803 		if (!SLIST_EMPTY(&t4_list)) {
5804 			rc = EBUSY;
5805 			mtx_unlock(&t4_list_lock);
5806 			break;
5807 		}
5808 		mtx_unlock(&t4_list_lock);
5809 		mtx_destroy(&t4_list_lock);
5810 		break;
5811 	}
5812 
5813 	return (rc);
5814 }
5815 
5816 static devclass_t t4_devclass;
5817 static devclass_t cxgbe_devclass;
5818 
5819 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, t4_mod_event, 0);
5820 MODULE_VERSION(t4nex, 1);
5821 
5822 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
5823 MODULE_VERSION(cxgbe, 1);
5824